diff --git a/contrib/xz/ChangeLog b/contrib/xz/ChangeLog index fca1504a7990..64c79dbce0bc 100644 --- a/contrib/xz/ChangeLog +++ b/contrib/xz/ChangeLog @@ -1,10186 +1,10890 @@ +commit c1e396a9ac1c1c28ce4ede5cbadb955c516477bc +Author: Jia Tan +Date: 2023-08-02 20:32:20 +0800 + + Bump version and soname for 5.4.4. + + src/liblzma/Makefile.am | 2 +- + src/liblzma/api/lzma/version.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit 7d266d25ae323a2dc5f2e254c991ef84b997adad +Author: Jia Tan +Date: 2023-08-02 20:30:07 +0800 + + Add NEWS for 5.4.4. + + NEWS | 43 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 43 insertions(+) + +commit 6a1093c0004c42eeaef312456c295671496dd67a +Author: Lasse Collin +Date: 2023-08-02 15:19:43 +0300 + + build-aux/manconv.sh: Fix US-ASCII and UTF-8 output. + + groff defaults to SGR escapes. Using -P-c passes -c to grotty + which restores the old behavior. Perhaps there is a better way to + get pure plain text output but this works for now. + + build-aux/manconv.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit f6f9f5651a7e2d89f588981533155ab850e063f7 +Author: Lasse Collin +Date: 2023-08-01 19:10:43 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 0dd83ba8767dad722e0f0f94e0e4146e290a28cb +Author: Lasse Collin +Date: 2023-08-01 18:22:24 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 4170a80785cc69edfc5fdfe498bed5f40db6a7ef +Author: ChanTsune <41658782+ChanTsune@users.noreply.github.com> +Date: 2023-08-01 18:17:17 +0300 + + mythread.h: Disable signal functions in builds targeting Wasm + WASI. + + signal.h in WASI SDK doesn't currently provide sigprocmask() + or sigset_t. liblzma doesn't need them so this change makes + liblzma and xzdec build against WASI SDK. xz doesn't build yet + and the tests don't either as tuktest needs setjmp() which + isn't (yet?) implemented in WASI SDK. + + Closes: https://github.com/tukaani-project/xz/pull/57 + See also: https://github.com/tukaani-project/xz/pull/56 + + (The original commit was edited a little by Lasse Collin.) + + src/common/mythread.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 0db6fbe0be1545a4f25fcd6993371155b37bbb26 +Author: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> +Date: 2023-07-31 14:02:21 +0200 + + Docs: Fix typos found by codespell + + CMakeLists.txt | 4 ++-- + NEWS | 2 +- + configure.ac | 2 +- + src/liblzma/api/lzma/container.h | 4 ++-- + src/liblzma/api/lzma/filter.h | 2 +- + src/liblzma/api/lzma/lzma12.h | 4 ++-- + src/liblzma/common/block_buffer_encoder.c | 2 +- + src/liblzma/common/common.h | 2 +- + src/liblzma/common/file_info.c | 2 +- + src/liblzma/common/lzip_decoder.c | 2 +- + src/liblzma/common/stream_decoder_mt.c | 8 ++++---- + src/liblzma/common/string_conversion.c | 6 +++--- + src/liblzma/lz/lz_encoder.h | 2 +- + src/liblzma/lzma/lzma_encoder.c | 4 ++-- + src/xz/hardware.c | 4 ++-- + tests/test_filter_flags.c | 4 ++-- + tests/test_index.c | 2 +- + tests/test_vli.c | 2 +- + 18 files changed, 29 insertions(+), 29 deletions(-) + +commit 84a3a1f4e4f146b3b5fa73fe708cacb0d9f829f3 +Author: Jia Tan +Date: 2023-07-26 20:26:23 +0800 + + Update .gitignore. + + .gitignore | 4 ++++ + 1 file changed, 4 insertions(+) + +commit 962b3d41e0c27355ba3052ef7b7d9a887de807e5 +Author: Jia Tan +Date: 2023-07-28 22:03:08 +0800 + + CMake: Conditionally allow the creation of broken symlinks. + + The CMake build will try to create broken symlinks on Unix and Unix-like + platforms. Cygwin and MSYS2 are Unix-like, but may not be able to create + broken symlinks. The value of the CYGWIN or MSYS environment variables + determine if broken symlinks are valid. + + The default for MSYS2 does not allow for broken symlinks, so the CMake + build has been broken for MSYS2 since commit + 80a1a8bb838842a2be343bd88ad1462c21c5e2c9. + + CMakeLists.txt | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 75 insertions(+), 7 deletions(-) + +commit 19899340cf74d98304f9f5b726c72e85c7017d72 +Author: Jia Tan +Date: 2023-07-24 21:43:44 +0800 + + liblzma: Prevent an empty translation unit in Windows builds. + + To workaround Automake lacking Windows resource compiler support, an + empty source file is compiled to overwrite the resource files for static + library builds. Translation units without an external declaration are + not allowed by the C standard and result in a warning when used with + -Wempty-translation-unit (Clang) or -pedantic (GCC). + + src/liblzma/Makefile.am | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +commit ef7fbe83937abd7f55f1637c1d08b559f1d3d6cc +Author: Jia Tan +Date: 2023-07-21 18:05:44 +0800 + + Tests: Skip .lz files in test_files.sh if not configured. + + Previously if the lzip decoder was not configured then test_files.sh + would pass the lzip tests instead of skipping them. + + tests/test_files.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit e49815ba7aa610b72e8724025eceee23c27e7b23 +Author: Jia Tan +Date: 2023-07-20 22:11:13 +0800 + + Tests: Add ARM64 filter test to test_compress.sh. + + tests/test_compress.sh | 1 + + 1 file changed, 1 insertion(+) + +commit db5b57b827c2b084586c51da1b8a3ab0984bdb40 +Author: Jia Tan +Date: 2023-07-22 18:37:56 +0800 + + Translations: Update the Vietnamese translation. + + po/vi.po | 45 ++++++++++++++++++++++++++++----------------- + 1 file changed, 28 insertions(+), 17 deletions(-) + +commit e6ba5014081c98cb068253b3e5f9e372dd157f3c +Author: Jia Tan +Date: 2023-07-20 20:30:05 +0800 + + Translations: Update the Croatian translation. + + po/hr.po | 49 ++++++++++++++++++++++++++++++------------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +commit 82657a1f10a25050a3e9c246c41deb785cccba3a +Author: Jia Tan +Date: 2023-07-20 20:28:32 +0800 + + Translations: Update the Korean man page translations. + + po4a/ko.po | 1255 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 629 insertions(+), 626 deletions(-) + +commit fab35907d13efbe7c457e00a310123335d7bae3e +Author: Jia Tan +Date: 2023-07-20 20:25:24 +0800 + + Translations: Update the Korean translation. + + po/ko.po | 45 ++++++++++++++++++++++++++++----------------- + 1 file changed, 28 insertions(+), 17 deletions(-) + +commit f530fe1964c34797fa3b775ec5804d68007c1579 +Author: Jia Tan +Date: 2023-07-20 20:24:05 +0800 + + Translations: Update the Polish translation. + + po/pl.po | 47 +++++++++++++++++++++++++++++------------------ + 1 file changed, 29 insertions(+), 18 deletions(-) + +commit 10b3500cc2617aabdbc802a63164ce0a3683ad2b +Author: Jia Tan +Date: 2023-07-20 20:22:23 +0800 + + Translations: Update the German man page translations. + + po4a/de.po | 1255 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 629 insertions(+), 626 deletions(-) + +commit 3d14e8a34dba176f975326924ae28a3ab5fee165 +Author: Jia Tan +Date: 2023-07-20 20:18:44 +0800 + + Translations: Update the German translation. + + po/de.po | 47 +++++++++++++++++++++++++++++------------------ + 1 file changed, 29 insertions(+), 18 deletions(-) + +commit bdcc180a2da15039c47455c1cafd1ce9aff442e4 +Author: Jia Tan +Date: 2023-07-20 20:17:10 +0800 + + Translations: Update the Chinese (simplified) translation. + + po/zh_CN.po | 47 +++++++++++++++++++++++++++++------------------ + 1 file changed, 29 insertions(+), 18 deletions(-) + +commit 844240350d33cb46ba8827b74dc8758e2abed9e5 +Author: Jia Tan +Date: 2023-07-20 20:15:47 +0800 + + Translations: Update the Swedish translation. + + po/sv.po | 47 +++++++++++++++++++++++++++++------------------ + 1 file changed, 29 insertions(+), 18 deletions(-) + +commit bdc7952bcfbc20c134f723ff5cbf84ab690fb4fc +Author: Jia Tan +Date: 2023-07-20 20:14:00 +0800 + + Translations: Update the Ukrainian man page translations. + + po4a/uk.po | 1253 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 628 insertions(+), 625 deletions(-) + +commit 143396e070c8d8afb56e3a5f688b214222cf71e0 +Author: Jia Tan +Date: 2023-07-20 20:09:15 +0800 + + Translations: Update the Ukrainian translation. + + po/uk.po | 45 ++++++++++++++++++++++++++++----------------- + 1 file changed, 28 insertions(+), 17 deletions(-) + +commit 1b8146d64eaac51b40079f5788b85c1eb79e35de +Author: Jia Tan +Date: 2023-07-20 20:06:57 +0800 + + Translations: Update the Spanish translation. + + po/es.po | 47 +++++++++++++++++++++++++++++------------------ + 1 file changed, 29 insertions(+), 18 deletions(-) + +commit f947ce09d79ef8fa6549d350c4d72e4a8a9fa697 +Author: Jia Tan +Date: 2023-07-20 20:05:19 +0800 + + Translations: Update the Romanian translation. + + po/ro.po | 48 ++++++++++++++++++++++++++++++------------------ + 1 file changed, 30 insertions(+), 18 deletions(-) + +commit f681301a368def3f597a7c7bea7cb3f7a2f933e7 +Author: Jia Tan +Date: 2023-07-20 20:02:56 +0800 + + Translations: Update the Romanian man page translations. + + po4a/ro.po | 1254 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 629 insertions(+), 625 deletions(-) + +commit 8bc3146c6be920b56283f4ee846456cb34ae4b6f +Author: Jia Tan +Date: 2023-07-13 21:26:47 +0800 + + xz: Update man page Authors and date. + + src/xz/xz.1 | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +commit c2905540ef905412b54c76488758712d3e2a0f9d +Author: Jia Tan +Date: 2023-07-18 23:19:09 +0800 + + xz: Slight reword in xz man page for consistency. + + Changed will print => prints in xz --robot --version description to + match --robot --info-memory description. + + src/xz/xz.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 2600d3352449faf57d9cb3fa00cbe40548a1310f +Author: Jia Tan +Date: 2023-07-18 22:49:57 +0800 + + liblzma: Improve comment in string_conversion.c. + + The comment used "flag" when referring to decoder options. Just + referring to them as options is more clear and consistent. + + src/liblzma/common/string_conversion.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit 98fc14541eec3e1dcf694075c8533f19a3cee552 +Author: Jia Tan +Date: 2023-05-13 21:21:54 +0800 + + liblzma: Reword lzma_str_list_filters() documentation. + + Reword "options required" to "options read". The previous wording + may have suggested that the options listed were all required when + the filters are used for encoding or decoding. Now it should be + more clear that the options listed are the ones relevant for + encoding or decoding. + + src/liblzma/api/lzma/filter.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 1ac79b4cbaecf3f4930a5d8ce6f7e1f2b63c2a01 +Author: Lasse Collin +Date: 2023-07-18 17:37:33 +0300 + + xz: Translate the second "%s: " in message.c since French needs "%s : ". + + This string is used to print a filename when using "xz -v" and + stderr isn't a terminal. + + src/xz/message.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 97851be2c6c64494f23c3a735601c13d4387820b +Author: Lasse Collin +Date: 2023-07-18 14:35:33 +0300 + + xz: Make "%s: %s" translatable because French needs "%s : %s". + + src/xz/args.c | 5 ++++- + src/xz/coder.c | 8 ++++---- + src/xz/file_io.c | 8 ++++---- + src/xz/list.c | 11 ++++++----- + 4 files changed, 18 insertions(+), 14 deletions(-) + +commit b406828a6dfd3caa4f77efe3ff3e3eea263eee62 +Author: Lasse Collin +Date: 2023-07-18 13:57:54 +0300 + + liblzma: Tweak #if condition in memcmplen.h. + + Maybe ICC always #defines _MSC_VER on Windows but now + it's very clear which code will get used. + + src/liblzma/common/memcmplen.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit ef4a07ad9434f81417395f6fe0bb331e027a703b +Author: Lasse Collin +Date: 2023-07-18 13:49:43 +0300 + + liblzma: Omit unnecessary parenthesis in a preprocessor directive. + + src/liblzma/common/memcmplen.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit 64ee0caaea06654b28afaee850fb187a11bf9cb2 +Author: Jia Tan +Date: 2023-06-28 20:22:38 +0800 + + liblzma: Prevent warning for MSYS2 Windows build. + + In lzma_memcmplen(), the header file is only included if + _MSC_VER and _M_X64 are both defined but _BitScanForward64() was + previously used if _M_X64 was defined. GCC for MSYS2 defines _M_X64 but + not _MSC_VER so _BitScanForward64() was used without including + . + + Now, lzma_memcmplen() will use __builtin_ctzll() for MSYS2 GCC builds as + expected. + + src/liblzma/common/memcmplen.h | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +commit ba177057643dae6e9c167ee26471181c199c8a00 +Author: Jia Tan +Date: 2023-07-14 23:20:33 +0800 + + Docs: Add a new section to INSTALL for Tests. + + The new Tests section describes basic information about the tests, how + to run them, and important details when cross compiling. We have had a + few questions about how to compile the tests without running them, so + hopefully this information will help others with the same question in the + future. + + Fixes: https://github.com/tukaani-project/xz/issues/54 + + INSTALL | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 64 insertions(+), 17 deletions(-) + +commit 0745b900fafa5ac2c31e8ec3cfba3053457c8b1f +Author: Jia Tan +Date: 2023-07-14 21:10:27 +0800 + + Docs: Update README. + + This adds an entry to "Other implementations of the .xz format" for + XZ for Java. + + README | 4 ++++ + 1 file changed, 4 insertions(+) + +commit c972d44103c4edf88e73917ef08bde69db9d06cb +Author: Jia Tan +Date: 2023-07-18 13:27:46 +0300 + + xz: Fix typo in man page. + + The Memory limit information section described three output + columns when it actually has six. This was reworded to + "multiple" to make it more future proof. + + src/xz/xz.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 6e21f1b4e2c580b64d258e3b81ac9e44d53b9283 +Author: Jia Tan +Date: 2023-07-14 21:30:25 +0800 + + Tests: Improve feature testing for skipping. + + Fixed a bug where test_compress_* would all fail if arm64 or armthumb + filters were enabled for compression but arm was disabled. Since the + grep tests only checked for "define HAVE_ENCODER_ARM", this would match + on HAVE_ENCODER_ARM64 or HAVE_ENCODER_ARMTHUMB. + + Now the config.h feature test requires " 1" at the end to prevent the + prefix problem. have_feature() was also updated for this even though + there were known current bugs affecting it. This is just in case future + features have a similar prefix problem. + + tests/test_compress.sh | 4 ++-- + tests/test_files.sh | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +commit 26c37d290c0884a4518a26b949575420a77fb938 +Author: Jia Tan +Date: 2023-07-10 20:56:28 +0800 + + Translations: Update the Chinese (traditional) translation. + + po/zh_TW.po | 659 ++++++++++++++++++++++++++++++++++-------------------------- + 1 file changed, 377 insertions(+), 282 deletions(-) + +commit 2ec65181e28335692a3a4fac4fefc4303111d355 +Author: Jia Tan +Date: 2023-07-08 20:03:59 +0800 + + Translations: Update the Vietnamese translation. + + po/vi.po | 620 +++++++++++++++++++++++++++++++++++---------------------------- + 1 file changed, 349 insertions(+), 271 deletions(-) + +commit c44c7e7590db1b89c97e106f5c516bf69189e233 +Author: Jia Tan +Date: 2023-06-28 20:46:31 +0800 + + Tests: Fix memory leaks in test_index. + + Several tests were missing calls to lzma_index_end() to clean up the + lzma_index structs. The memory leaks were discovered by using + -fsanitize=address with GCC. + + tests/test_index.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +commit b9499c94fccc03eba5311b5305f04149a7c5af65 +Author: Jia Tan +Date: 2023-06-28 20:43:29 +0800 + + Tests: Fix memory leaks in test_block_header. + + test_block_header was not properly freeing the filter options between + calls to lzma_block_header_decode(). The memory leaks were discovered by + using -fsanitize=address with GCC. + + tests/test_block_header.c | 38 ++++++++++++++++++++++---------------- + 1 file changed, 22 insertions(+), 16 deletions(-) + +commit 1155471651ad456c5f90aee6435931fae65682bf +Author: Jia Tan +Date: 2023-06-28 20:31:11 +0800 + + liblzma: Prevent uninitialzed warning in mt stream encoder. + + This change only impacts the compiler warning since it was impossible + for the wait_abs struct in stream_encode_mt() to be used before it was + initialized since mythread_condtime_set() will always be called before + mythread_cond_timedwait(). + + Since the mythread.h code is different between the POSIX and + Windows versions, this warning was only present on Windows builds. + + Thanks to Arthur S for reporting the warning and providing an initial + patch. + + src/liblzma/common/stream_encoder_mt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 62dd1c9bf0c66679ceeebc8b99bca136ca43bcb9 +Author: Jia Tan +Date: 2023-06-07 00:10:38 +0800 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit d04eb78ab3892d3678c180437dd66379efa051fd +Author: Benjamin Buch +Date: 2023-06-06 15:32:45 +0200 + + CMake: Protects against double find_package + + Boost iostream uses `find_package` in quiet mode and then again uses + `find_package` with required. This second call triggers a + `add_library cannot create imported target "LibLZMA::LibLZMA" + because another target with the same name already exists.` + + This can simply be fixed by skipping the alias part on secondary + `find_package` runs. + + CMakeLists.txt | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +commit 12ea1fb29763dd72376aca73d69621aff01eaa7f +Author: Jia Tan +Date: 2023-05-31 20:26:42 +0800 + + Translations: Update the Esperanto translation. + + po/eo.po | 185 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 92 insertions(+), 93 deletions(-) + +commit a4d45c07c1e4aa0ea9410b4eed7301c97e960319 +Author: Jia Tan +Date: 2023-05-31 20:25:00 +0800 + + Translations: Update the Croatian translation. + + po/hr.po | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit f51b7bcea6f948db9529b3e6d850e76a30cc0e96 +Author: Jia Tan +Date: 2023-05-31 20:15:53 +0800 + + Translations: Update the Chinese (simplified) translation. + + po/zh_CN.po | 317 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 157 insertions(+), 160 deletions(-) + +commit d5b81c19a7594fbb913bb3450360bc2145f29568 +Author: Jia Tan +Date: 2023-05-17 23:12:13 +0800 + + Translations: Update German translation of man pages. + + po4a/de.po | 52 ++++++++++++---------------------------------------- + 1 file changed, 12 insertions(+), 40 deletions(-) + +commit 36860a3e30d5675b4dba3a343159458d8864bceb +Author: Jia Tan +Date: 2023-05-17 23:09:18 +0800 + + Translations: Update the German translation. + + po/de.po | 189 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 94 insertions(+), 95 deletions(-) + +commit f05641ef0d4990c4c58f459039d412475b94b37d +Author: Jia Tan +Date: 2023-05-17 20:30:01 +0800 + + Translations: Update the Croatian translation. + + po/hr.po | 187 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 93 insertions(+), 94 deletions(-) + +commit b852f6f11d43cafce871cd158c80488f86619a63 +Author: Jia Tan +Date: 2023-05-17 20:26:54 +0800 + + Translations: Update Korean translation of man pages. + + po4a/ko.po | 3015 ++++++++++++------------------------------------------------ + 1 file changed, 568 insertions(+), 2447 deletions(-) + +commit fdcb78fb6f5313eda363869690685471d7ab9731 +Author: Jia Tan +Date: 2023-05-17 20:13:01 +0800 + + Translations: Update the Korean translation. + + po/ko.po | 319 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 158 insertions(+), 161 deletions(-) + +commit 9e39cebe3ccdd4bc320ce6e7eea989bdd167c8b2 +Author: Jia Tan +Date: 2023-05-16 23:49:09 +0800 + + Translations: Update the Spanish translation. + + po/es.po | 319 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 158 insertions(+), 161 deletions(-) + +commit 7a578d1d8293fc1cf7c6f9df76bc6a0e55c51a14 +Author: Jia Tan +Date: 2023-05-16 23:47:23 +0800 + + Translations: Update the Romanian translation. + + po/ro.po | 195 ++++++++++++++++++++++++++++++++------------------------------- + 1 file changed, 98 insertions(+), 97 deletions(-) + +commit 362320fea5383bae179763763625532bad6d10e8 +Author: Jia Tan +Date: 2023-05-16 23:45:43 +0800 + + Translations: Update Romanian translation of man pages. + + po4a/ro.po | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +commit 29c5a870be23d0a565b959404b34a8b1babef7df +Author: Jia Tan +Date: 2023-05-16 23:43:51 +0800 + + Translations: Update Ukrainian translation of man pages. + + po4a/uk.po | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +commit 5ee57572773207371712b1bbcd9b5b8c1baad2cc +Author: Jia Tan +Date: 2023-05-16 23:37:54 +0800 + + Translations: Update the Ukrainian translation. + + po/uk.po | 321 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 159 insertions(+), 162 deletions(-) + +commit dfc257bdb224780d3c8cdeca5a8e52848b9adc0a +Author: Jia Tan +Date: 2023-05-16 23:07:35 +0800 + + Translations: Update the Polish translation. + + po/pl.po | 316 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 155 insertions(+), 161 deletions(-) + +commit f6df4c4ab7378d2971ae1606f4eecc011ea5fc6d +Author: Jia Tan +Date: 2023-05-16 22:52:14 +0800 + + Translations: Update the Swedish translation. + + po/sv.po | 319 +++++++++++++++++++++++++++++++-------------------------------- + 1 file changed, 158 insertions(+), 161 deletions(-) + +commit 25da22e0521ec698dae4eb5e994b4cd3f4708b60 +Author: Jia Tan +Date: 2023-05-16 21:21:38 +0800 + + Translations: Update the Esperanto translation. + + po/eo.po | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + +commit 4f57a9c9915b7b9267885cf9fca58e5c8208458d +Author: Jia Tan +Date: 2023-05-11 23:49:23 +0800 + + liblzma: Adds lzma_nothrow to MicroLZMA API functions. + + None of the liblzma functions may throw an exception, so this + attribute should be applied to all liblzma API functions. + + src/liblzma/api/lzma/container.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + commit 238b4e5458b4bd2cadefb768b8ea7c6b70a191ac Author: Jia Tan Date: 2023-05-04 20:38:52 +0800 Translations: Update the Croatian translation. po/hr.po | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 0cee63c3c6c4f9084a5e7fa7c15a1863ac8e5ffc Author: Jia Tan Date: 2023-05-04 19:50:42 +0800 Bump version and soname for 5.4.3. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 01f937ea9a49005c5091c0165fa0c1e572dd1749 Author: Jia Tan Date: 2023-05-02 20:39:56 +0800 Add NEWS for 5.4.3. NEWS | 10 ++++++++++ 1 file changed, 10 insertions(+) commit e9b9ea953179502d5245b4e6ca4f5ffe9d5797b1 Author: Lasse Collin Date: 2023-05-03 22:46:42 +0300 tuklib_integer.h: Fix a recent copypaste error in Clang detection. Wrong line was changed in 7062348bf35c1e4cbfee00ad9fffb4a21aa6eff7. Also, this has >= instead of == since ints larger than 32 bits would work too even if not relevant in practice. src/common/tuklib_integer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit be6e39a8622f0d905072874ec25b1ffd155b12e1 Author: Jia Tan Date: 2023-04-20 20:15:00 +0800 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 9e343a46cf87a345799222c0b0b3a6f3358dde0c Author: Jia Tan Date: 2023-04-19 22:22:16 +0800 Windows: Include when needed. Legacy Windows did not need to #include to use the MSVC intrinsics. Newer versions likely just issue a warning, but the MSVC documentation says to include the header file for the intrinsics we use. GCC and Clang can "pretend" to be MSVC on Windows, so extra checks are needed in tuklib_integer.h to only include when it will is actually needed. src/common/tuklib_integer.h | 6 ++++++ src/liblzma/common/memcmplen.h | 10 ++++++++++ 2 files changed, 16 insertions(+) commit 12321a9390acc076b414035a46df9d7545ac379f Author: Jia Tan Date: 2023-04-19 21:59:03 +0800 tuklib_integer: Use __builtin_clz() with Clang. Clang has support for __builtin_clz(), but previously Clang would fallback to either the MSVC intrinsic or the regular C code. This was discovered due to a bug where a new version of Clang required the header file in order to use the MSVC intrinsics. Thanks to Anton Kochkov for notifying us about the bug. src/common/tuklib_integer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit d1f0e01c395960efd6c29ff144eda4f4c8c1c6d3 Author: Lasse Collin Date: 2023-04-14 18:42:33 +0300 liblzma: Update project maintainers in lzma.h. AUTHORS was updated earlier, lzma.h was simply forgotten. src/liblzma/api/lzma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 8204c5d1304e1e3b7487801b4acaf8e3179a6c52 Author: Jia Tan Date: 2023-04-13 20:45:19 +0800 liblzma: Cleans up old commented out code. src/liblzma/common/alone_encoder.c | 11 ----------- 1 file changed, 11 deletions(-) commit 32980d1562bc031013306be1c787761fa63da87d Author: Jia Tan Date: 2023-03-28 22:32:40 +0800 CMake: Update liblzma-config.cmake generation. Now that the threading is configurable, the liblzma CMake package only needs the threading library when using POSIX threads. CMakeLists.txt | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) commit 023907faa965476fb921a6344253c50e0ba795f9 Author: Jia Tan Date: 2023-03-28 22:25:33 +0800 CMake: Allows setting thread method. The thread method is now configurable for the CMake build. It matches the Autotools build by allowing ON (pick the best threading method), OFF (no threading), posix, win95, and vista. If both Windows and posix threading are both available, then ON will choose Windows threading. Windows threading will also not use: target_link_libraries(liblzma Threads::Threads) since on systems like MinGW-w64 it would link the posix threads without purpose. CMakeLists.txt | 144 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 104 insertions(+), 40 deletions(-) commit ba176d77cbb164b13006516660fc1ff6c59dedb5 Author: Jia Tan Date: 2023-03-24 20:05:59 +0800 CMake: Only build xzdec if decoders are enabled. CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c99d697df800e9bc47e09facf88bcfdb9e0264a5 Author: Jia Tan Date: 2023-03-22 15:42:04 +0800 Build: Removes redundant check for LZMA1 filter support. src/liblzma/lzma/Makefile.inc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) commit 54a2fd8c850e626343b79c4cc8fcac2f638ec890 Author: Lasse Collin Date: 2023-03-23 15:14:29 +0200 CMake: Bump maximum policy version to 3.26. It adds only one new policy related to FOLDERS which we don't use. This makes it clear that the code is compatible with the policies up to 3.26. CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 622d0fdc02d5f565c2872d79bc70c669c95c8284 Author: Jia Tan Date: 2023-03-21 23:36:00 +0800 CMake: Conditionally build xz list.* files if decoders are enabled. CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) commit 3e2dc523c596cd770a6a7c58cacc0d0d8761e853 Author: Jia Tan Date: 2023-02-25 11:46:50 +0800 CMake: Allow configuring features as cache variables. This allows users to change the features they build either in CMakeCache.txt or by using a CMake GUI. The sources built for liblzma are affected by this too, so only the necessary files will be compiled. CMakeLists.txt | 528 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 391 insertions(+), 137 deletions(-) commit 0c053f97337fa4ead77acefb577e8e86f8ef388d Author: Lasse Collin Date: 2023-03-21 14:07:51 +0200 Build: Add a comment that AC_PROG_CC_C99 is needed for Autoconf 2.69. It's obsolete in Autoconf >= 2.70 and just an alias for AC_PROG_CC but Autoconf 2.69 requires AC_PROG_CC_C99 to get a C99 compiler. configure.ac | 3 +++ 1 file changed, 3 insertions(+) commit 915d4f6058d52f84d2b58b0a5f3c8272eaa6c1bd Author: Lasse Collin Date: 2023-03-21 14:04:37 +0200 Build: configure.ac: Use AS_IF and AS_CASE where required. This makes no functional difference in the generated configure (at least with the Autotools versions I have installed) but this change might prevent future bugs like the one that was just fixed in the commit 5a5bd7f871818029d5ccbe189f087f591258c294. configure.ac | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) commit b848c039586ed2cddb6fb033680dac107cc5143f Author: Lasse Collin Date: 2023-03-21 13:12:03 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit c775ba1602a74f29dbc2088bbe4d02c23fc32ba1 Author: Lasse Collin Date: 2023-03-21 13:11:49 +0200 Build: Fix --disable-threads breaking the building of shared libs. This is broken in the releases 5.2.6 to 5.4.2. A workaround for these releases is to pass EGREP='grep -E' as an argument to configure in addition to --disable-threads. The problem appeared when m4/ax_pthread.m4 was updated in the commit 6629ed929cc7d45a11e385f357ab58ec15e7e4ad which introduced the use of AC_EGREP_CPP. AC_EGREP_CPP calls AC_REQUIRE([AC_PROG_EGREP]) to set the shell variable EGREP but this was only executed if POSIX threads were enabled. Libtool code also has AC_REQUIRE([AC_PROG_EGREP]) but Autoconf omits it as AC_PROG_EGREP has already been required earlier. Thus, if not using POSIX threads, the shell variable EGREP would be undefined in the Libtool code in configure. ax_pthread.m4 is fine. The bug was in configure.ac which called AX_PTHREAD conditionally in an incorrect way. Using AS_CASE ensures that all AC_REQUIREs get always run. Thanks to Frank Busse for reporting the bug. Fixes: https://github.com/tukaani-project/xz/issues/45 configure.ac | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) commit 0673c9ec98b6bae12b33dc295564514aaa26e2fc Author: Lasse Collin Date: 2023-03-19 22:45:59 +0200 liblzma: Silence -Wsign-conversion in SSE2 code in memcmplen.h. Thanks to Christian Hesse for reporting the issue. Fixes: https://github.com/tukaani-project/xz/issues/44 src/liblzma/common/memcmplen.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 6ca8046ecbc7a1c81ee08f544bfd1414819fb2e8 Author: Jia Tan Date: 2023-03-18 23:22:06 +0800 Bump version and soname for 5.4.2. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit ce4f246600db10e77fc93d492ea045b4c9830bc6 Author: Jia Tan Date: 2023-03-18 22:10:57 +0800 Add NEWS for 5.4.2. NEWS | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) commit 3634fe330788fc0df217acdb6581031a851600a3 Author: Lasse Collin Date: 2023-03-18 16:00:54 +0200 Update the copy of GNU GPLv3 from gnu.org to COPYING.GPLv3. COPYING.GPLv3 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit 97679d25ce7cb014328f1455bb338903c003d54f Author: Lasse Collin Date: 2023-03-18 15:51:57 +0200 Change a few HTTP URLs to HTTPS. The xz man page timestamp was intentionally left unchanged. INSTALL | 2 +- README | 8 ++++---- configure.ac | 2 +- dos/INSTALL.txt | 4 ++-- src/liblzma/api/lzma.h | 8 ++++---- src/liblzma/check/sha256.c | 2 +- src/xz/xz.1 | 2 +- windows/INSTALL-MinGW.txt | 10 +++++----- 8 files changed, 19 insertions(+), 19 deletions(-) commit 01a67e89a56c3ebf5f6681bd06f0edc22a1ae594 Author: Jia Tan Date: 2023-03-18 00:40:28 +0800 CMake: Fix typo in a comment. CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5dca3f5cbab31105196c89cd6df0a9bbc3307c05 Author: Lasse Collin Date: 2023-03-17 18:36:22 +0200 Windows: build.bash: Copy liblzma API docs to the output package. windows/build.bash | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit ae252862b30b509ab88b2bbcaa08e8d51b57e928 Author: Lasse Collin Date: 2023-03-17 08:53:38 +0200 Windows: Add microlzma_*.c to the VS project files. These should have been included in 5.3.2alpha already. windows/vs2013/liblzma.vcxproj | 2 ++ windows/vs2013/liblzma_dll.vcxproj | 2 ++ windows/vs2017/liblzma.vcxproj | 2 ++ windows/vs2017/liblzma_dll.vcxproj | 2 ++ windows/vs2019/liblzma.vcxproj | 2 ++ windows/vs2019/liblzma_dll.vcxproj | 2 ++ 6 files changed, 12 insertions(+) commit 147d282cc3733e2723df82622b6540ddfb52635e Author: Lasse Collin Date: 2023-03-17 08:43:51 +0200 CMake: Add microlzma_*.c to the build. These should have been included in 5.3.2alpha already. CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) commit 4523a5ee29f45c0256af67a15771bc8bbd25ba53 Author: Lasse Collin Date: 2023-03-17 08:41:36 +0200 Build: Update comments about unaligned access to mention 64-bit. cmake/tuklib_integer.cmake | 7 +++---- m4/tuklib_integer.m4 | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) commit 82aacb40367dc580b09c5a7b270f6c98b63e49b9 Author: Lasse Collin Date: 2023-03-17 00:02:30 +0200 Tests: Update .gitignore. .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 5d022c24921eec938604a8cb10d70aa08dbd8575 Author: Lasse Collin Date: 2023-03-14 20:04:03 +0200 po4a/update-po: Display the script name consistently in error messages. po4a/update-po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 896295117324f323c0b8db6a31ad6ebfaa88793d Author: Jia Tan Date: 2023-03-17 01:30:36 +0800 Doc: Rename Doxygen HTML doc directory name liblzma => api. When the docs are installed, calling the directory "liblzma" is confusing since multiple other files in the doc directory are for liblzma. This should also make it more natural for distros when they package the documentation. .gitignore | 2 +- Makefile.am | 18 +++++++++--------- PACKAGERS | 4 ++-- doxygen/Doxyfile | 2 +- doxygen/update-doxygen | 18 +++++++++--------- 5 files changed, 22 insertions(+), 22 deletions(-) commit 94097157fae308b2c1a5edb4e8597c68b45eeaea Author: Jia Tan Date: 2023-03-16 22:07:15 +0800 liblzma: Remove note from lzma_options_bcj about the ARM64 exception. This was left in by mistake since an early version of the ARM64 filter used a different struct for its options. src/liblzma/api/lzma/bcj.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit da16d0b73b79d7785ece6f78a577fadd1fb59d0e Author: Lasse Collin Date: 2023-03-15 19:19:13 +0200 COPYING: Add a note about the included Doxygen-generated HTML. COPYING | 11 +++++++++++ 1 file changed, 11 insertions(+) commit e57c74f9ef19201f72c106f2c347596f40229936 Author: Jia Tan Date: 2023-03-16 21:41:09 +0800 Doc: Update PACKAGERS with details about liblzma API docs install. PACKAGERS | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) commit 7e2fa48bb73edb25457232e2e62a4f81c6b95281 Author: Jia Tan Date: 2023-03-16 21:38:32 +0800 liblzma: Add set lzma.h as the main page for Doxygen documentation. The \mainpage command is used in the first block of comments in lzma.h. This changes the previously nearly empty index.html to use the first comment block in lzma.h for its contents. lzma.h is no longer documented separately, but this is for the better since lzma.h only defined a few macros that users do not need to use. The individual API header files all have a disclaimer that they should not be #included directly, so there should be no confusion on the fact that lzma.h should be the only header used by applications. Additionally, the note "See ../lzma.h for information about liblzma as a whole." was removed since lzma.h is now the main page of the generated HTML and does not have its own page anymore. So it would be confusing in the HTML version and was only a "nice to have" when browsing the source files. src/liblzma/api/lzma.h | 1 + src/liblzma/api/lzma/base.h | 2 -- src/liblzma/api/lzma/bcj.h | 2 -- src/liblzma/api/lzma/block.h | 2 -- src/liblzma/api/lzma/check.h | 2 -- src/liblzma/api/lzma/container.h | 2 -- src/liblzma/api/lzma/delta.h | 2 -- src/liblzma/api/lzma/filter.h | 2 -- src/liblzma/api/lzma/hardware.h | 2 -- src/liblzma/api/lzma/index.h | 2 -- src/liblzma/api/lzma/index_hash.h | 4 +--- src/liblzma/api/lzma/lzma12.h | 2 -- src/liblzma/api/lzma/stream_flags.h | 2 -- src/liblzma/api/lzma/version.h | 2 -- src/liblzma/api/lzma/vli.h | 2 -- 15 files changed, 2 insertions(+), 29 deletions(-) commit d42977c7acfdf7fd9acc8803cf411eca7dc10478 Author: Jia Tan Date: 2023-03-16 21:37:32 +0800 Build: Generate doxygen documentation in autogen.sh. Another command line option (--no-doxygen) was added to disable creating the doxygen documenation in cases where it not wanted or if the doxygen tool is not installed. autogen.sh | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) commit 8fc712fcf41d821069c670f22b8bf628e7a4a877 Author: Jia Tan Date: 2023-03-16 21:35:55 +0800 Build: Create doxygen/update-doxygen script. This is a helper script to generate the Doxygen documentation. It can be run in 'liblzma' or 'internal' mode by setting the first argument. It will default to 'liblzma' mode and only generate documentation for the liblzma API header files. The helper script will be run during the custom mydist hook when we create releases. This hook already alters the source directory, so its fine to do it here too. This way, we can include the Doxygen generated files in the distrubtion and when installing. In 'liblzma' mode, the JavaScript is stripped from the .html files and the .js files are removed. This avoids license hassle from jQuery and other libraries that Doxygen 1.9.6 puts into jquery.js in minified form. Makefile.am | 1 + doxygen/update-doxygen | 111 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) commit 77d2c025d19fbac7277c63239cfb1338f02c66c0 Author: Jia Tan Date: 2023-03-16 21:34:36 +0800 Build: Install Doxygen docs and include in distribution if generated. Added a install-data-local target to install the Doxygen documentation only when it has been generated. In order to correctly remove the docs, a corresponding uninstall-local target was added. If the doxygen docs exist in the source tree, they will also be included in the distribution now too. Makefile.am | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 9af8f6f947a43974a4ff025f4c896f2fb4c91b4f Author: Jia Tan Date: 2023-01-03 20:37:30 +0800 Doxygen: Refactor Doxyfile.in to doxygen/Doxyfile. Instead of having Doxyfile.in configured by Autoconf, the Doxyfile can have the tags that need to be configured piped into the doxygen command through stdin with the overrides after Doxyfile's contents. Going forward, the documentation should be generated in two different modes: liblzma or internal. liblzma is useful for most users. It is the documentation for just the liblzma API header files. This is the default. internal is for people who want to understand how xz and liblzma work. It might be useful for people who want to contribute to the project. .gitignore | 3 +- Doxyfile.in | 1234 ------------------------- Makefile.am | 1 - configure.ac | 1 - doxygen/Doxyfile | 2684 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 2686 insertions(+), 1237 deletions(-) commit 1c558a26928b753fcf1c0d4bce9c0643285edd86 Author: Jia Tan Date: 2023-02-28 23:22:36 +0800 Tests: Remove unused macros and functions. tests/tests.h | 75 ----------------------------------------------------------- 1 file changed, 75 deletions(-) commit 7479a69a45a1b8fdb9a209e11f247dce11ac1ba0 Author: Jia Tan Date: 2023-01-12 22:29:07 +0800 Tests: Refactors existing lzma_index tests. Converts the existing lzma_index tests into tuktests and covers every API function from index.h except for lzma_file_info_decoder, which can be tested in the future. tests/test_index.c | 2036 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 1492 insertions(+), 544 deletions(-) commit fd56d5353360279c10e8e5e05f5bc6ea03f64584 Author: Lasse Collin Date: 2023-03-07 19:59:23 +0200 xz: Make Capsicum sandbox more strict with stdin and stdout. src/xz/file_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) commit d1bdaaebc68cae7f0ba457fa990b520df2186fd1 Author: Lasse Collin Date: 2023-03-11 19:31:40 +0200 xz: Don't fail if Capsicum is enabled but kernel doesn't support it. (This commit combines related commits from the master branch.) If Capsicum support is missing from the kernel or xz is being run in an emulator that lacks Capsicum suport, the syscalls will fail and set errno to ENOSYS. Previously xz would display and error and exit, making xz unusable. Now it will check for ENOSYS and run without sandbox support. Other tools like ssh behave similarly. Displaying a warning for missing Capsicum support was considered but such extra output would quickly become annoying. It would also break test_scripts.sh in "make check". Also move cap_enter() to be the first step instead of the last one. This matches the example in the cap_rights_limit(2) man page. With the current code it shouldn't make any practical difference though. Thanks to Xin Li for the bug report, suggesting a fix, and testing: https://github.com/tukaani-project/xz/pull/43 Thanks to Jia Tan for most of the original commits. src/xz/file_io.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) commit 5d351c69c19d212ddd2cf1f3bdb24900820c6776 Author: Jia Tan Date: 2023-02-04 21:06:35 +0800 Build: Adjust CMake version search regex. Now, the LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, and LZMA_VERSION_PATCH macros do not need to be on consecutive lines in version.h. They can be separated by more whitespace, comments, or even other content, as long as they appear in the proper order (major, minor, patch). CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) commit b82d4831e3f2339c4cdbc47776c99462088c11b7 Author: Jia Tan Date: 2023-01-26 09:50:21 +0800 liblzma: Improve documentation for version.h. Specified parameter and return values for API functions and documented a few more of the macros. src/liblzma/api/lzma/version.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) commit 2caba3efe3e0c2d7c6dca00e22c56812326a34e6 Author: Jia Tan Date: 2023-02-24 23:46:23 +0800 liblzma: Clarify lzma_lzma_preset() documentation in lzma12.h. lzma_lzma_preset() does not guarentee that the lzma_options_lzma are usable in an encoder even if it returns false (success). If liblzma is built with default configurations, then the options will always be usable. However if the match finders hc3, hc4, or bt4 are disabled, then the options may not be usable depending on the preset level requested. The documentation was updated to reflect this complexity, since this behavior was unclear before. src/liblzma/api/lzma/lzma12.h | 5 +++++ 1 file changed, 5 insertions(+) commit 594f904673ba55a833adfa60bbab6b60d4902d08 Author: Lasse Collin Date: 2023-02-27 18:38:35 +0200 CMake: Require that the C compiler supports C99 or a newer standard. Thanks to autoantwort for reporting the issue and suggesting a different patch: https://github.com/tukaani-project/xz/pull/42 CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) commit 88101143cb774bca6b7006c8335b09dc3f334140 Author: Jia Tan Date: 2023-02-24 18:10:37 +0800 Tests: Small tweak to test-vli.c. The static global variables can be disabled if encoders and decoders are not built. If they are not disabled and -Werror is used, it will cause an usused warning as an error. tests/test_vli.c | 2 ++ 1 file changed, 2 insertions(+) commit 4042dbf03a06e019ccdd1e9d1b94cf613d62d5a9 Author: Jia Tan Date: 2023-02-06 21:46:43 +0800 liblzma: Replace '\n' -> newline in filter.h documentation. The '\n' renders as a newline when the comments are converted to html by Doxygen. src/liblzma/api/lzma/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 3971f5c5024750ce8286891c6f79ae1661047483 Author: Jia Tan Date: 2023-02-06 21:45:37 +0800 liblzma: Shorten return description for two functions in filter.h. Shorten the description for lzma_raw_encoder_memusage() and lzma_raw_decoder_memusage(). src/liblzma/api/lzma/filter.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit 5e61b39432752b6cd1a7b518f0f9e4f0c3f0f242 Author: Jia Tan Date: 2023-02-06 21:44:45 +0800 liblzma: Reword a few lines in filter.h src/liblzma/api/lzma/filter.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit 8a53533869defa1191b41f176a0312cb53a139e2 Author: Jia Tan Date: 2023-02-06 21:35:06 +0800 liblzma: Improve documentation in filter.h. All functions now explicitly specify parameter and return values. The notes and code annotations were moved before the parameter and return value descriptions for consistency. Also, the description above lzma_filter_encoder_is_supported() about not being able to list available filters was removed since lzma_str_list_filters() will do this. src/liblzma/api/lzma/filter.h | 226 ++++++++++++++++++++++++++---------------- 1 file changed, 143 insertions(+), 83 deletions(-) commit 6d05b08b17ac8cb93165ee8f310fdd925b0b258f Author: Lasse Collin Date: 2023-02-23 20:46:16 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit dfc9a54082e0fc7b3b796ea15336b5075acc79d5 Author: Lasse Collin Date: 2023-02-21 22:57:10 +0200 liblzma: Avoid null pointer + 0 (undefined behavior in C). In the C99 and C17 standards, section 6.5.6 paragraph 8 means that adding 0 to a null pointer is undefined behavior. As of writing, "clang -fsanitize=undefined" (Clang 15) diagnoses this. However, I'm not aware of any compiler that would take advantage of this when optimizing (Clang 15 included). It's good to avoid this anyway since compilers might some day infer that pointer arithmetic implies that the pointer is not NULL. That is, the following foo() would then unconditionally return 0, even for foo(NULL, 0): void bar(char *a, char *b); int foo(char *a, size_t n) { bar(a, a + n); return a == NULL; } In contrast to C, C++ explicitly allows null pointer + 0. So if the above is compiled as C++ then there is no undefined behavior in the foo(NULL, 0) call. To me it seems that changing the C standard would be the sane thing to do (just add one sentence) as it would ensure that a huge amount of old code won't break in the future. Based on web searches it seems that a large number of codebases (where null pointer + 0 occurs) are being fixed instead to be future-proof in case compilers will some day optimize based on it (like making the above foo(NULL, 0) return 0) which in the worst case will cause security bugs. Some projects don't plan to change it. For example, gnulib and thus many GNU tools currently require that null pointer + 0 is defined: https://lists.gnu.org/archive/html/bug-gnulib/2021-11/msg00000.html https://www.gnu.org/software/gnulib/manual/html_node/Other-portability-assumptions.html In XZ Utils null pointer + 0 issue should be fixed after this commit. This adds a few if-statements and thus branches to avoid null pointer + 0. These check for size > 0 instead of ptr != NULL because this way bugs where size > 0 && ptr == NULL will likely get caught quickly. None of them are in hot spots so it shouldn't matter for performance. A little less readable version would be replacing ptr + offset with offset != 0 ? ptr + offset : ptr or creating a macro for it: #define my_ptr_add(ptr, offset) \ ((offset) != 0 ? ((ptr) + (offset)) : (ptr)) Checking for offset != 0 instead of ptr != NULL allows GCC >= 8.1, Clang >= 7, and Clang-based ICX to optimize it to the very same code as ptr + offset. That is, it won't create a branch. So for hot code this could be a good solution to avoid null pointer + 0. Unfortunately other compilers like ICC 2021 or MSVC 19.33 (VS2022) will create a branch from my_ptr_add(). Thanks to Marcin Kowalczyk for reporting the problem: https://github.com/tukaani-project/xz/issues/36 src/liblzma/common/block_decoder.c | 5 ++++- src/liblzma/common/block_encoder.c | 7 +++++-- src/liblzma/common/common.c | 20 ++++++++++++++------ src/liblzma/common/index_decoder.c | 13 ++++++++++--- src/liblzma/common/index_encoder.c | 11 +++++++++-- src/liblzma/common/index_hash.c | 13 ++++++++++--- src/liblzma/common/lzip_decoder.c | 6 +++++- src/liblzma/delta/delta_decoder.c | 7 ++++++- src/liblzma/delta/delta_encoder.c | 12 ++++++++++-- src/liblzma/simple/simple_coder.c | 6 ++++-- 10 files changed, 77 insertions(+), 23 deletions(-) commit f6dce49cb656b358e2fb2a3032e35e20af34dc84 Author: Jia Tan Date: 2023-02-07 00:00:44 +0800 liblzma: Adjust container.h for consistency with filter.h. src/liblzma/api/lzma/container.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) commit 173d240bb4763aedc8c01df4d9c83e311d954c33 Author: Jia Tan Date: 2023-02-07 00:00:09 +0800 liblzma: Fix small typos and reword a few things in filter.h. src/liblzma/api/lzma/container.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) commit 17797bacde3f7264048ef0976c137a600148d6cf Author: Jia Tan Date: 2023-02-06 23:42:08 +0800 liblzma: Convert list of flags in lzma_mt to bulleted list. src/liblzma/api/lzma/container.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) commit 37da0e727161b401b8bfd8dccf163a4b2fc0460b Author: Jia Tan Date: 2023-01-26 23:17:41 +0800 liblzma: Fix typo in documentation in container.h lzma_microlzma_decoder -> lzma_microlzma_encoder src/liblzma/api/lzma/container.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b8331077c626e6bf14f4671d09b561146eaf816a Author: Jia Tan Date: 2023-01-26 23:16:34 +0800 liblzma: Improve documentation for container.h Standardizing each function to always specify parameters and return values. Also moved the parameters and return values to the end of each function description. src/liblzma/api/lzma/container.h | 146 +++++++++++++++++++++++++-------------- 1 file changed, 93 insertions(+), 53 deletions(-) commit b9a3511bb61d3b6ce49abd33dce6155a573f5272 Author: Jia Tan Date: 2023-02-22 20:59:41 +0800 CMake: Add LZIP decoder test to list of tests. CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) commit cd82ef2fb48b174c57cd03b84a9a0f978772cb89 Author: Lasse Collin Date: 2023-02-17 20:56:49 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 076e911ba25cadf2cbdfbd0f65991a002e210c0b Author: Lasse Collin Date: 2023-02-17 20:48:28 +0200 Build: Use only the generic symbol versioning on MicroBlaze. On MicroBlaze, GCC 12 is broken in sense that __has_attribute(__symver__) returns true but it still doesn't support the __symver__ attribute even though the platform is ELF and symbol versioning is supported if using the traditional __asm__(".symver ...") method. Avoiding the traditional method is good because it breaks LTO (-flto) builds with GCC. See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101766 For now the only extra symbols in liblzma_linux.map are the compatibility symbols with the patch that spread from RHEL/CentOS 7. These require the use of __symver__ attribute or __asm__(".symver ...") in the C code. Compatibility with the patch from CentOS 7 doesn't seem valuable on MicroBlaze so use liblzma_generic.map on MicroBlaze instead. It doesn't require anything special in the C code and thus no LTO issues either. An alternative would be to detect support for __symver__ attribute in configure.ac and CMakeLists.txt and fall back to __asm__(".symver ...") but then LTO would be silently broken on MicroBlaze. It sounds likely that MicroBlaze is a special case so let's treat it as a such because that is simpler. If a similar issue exists on some other platform too then hopefully someone will report it and this can be reconsidered. (This doesn't do the same fix in CMakeLists.txt. Perhaps it should but perhaps CMake build of liblzma doesn't matter much on MicroBlaze. The problem breaks the build so it's easy to notice and can be fixed later.) Thanks to Vincent Fazio for reporting the problem and proposing a patch (in the end that solution wasn't used): https://github.com/tukaani-project/xz/pull/32 configure.ac | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit bc34e5ac9996667d2c1ec9a7895ec5931ac4caea Author: Lasse Collin Date: 2023-02-16 21:09:00 +0200 liblzma: Very minor API doc tweaks. Use "member" to refer to struct members as that's the term used by the C standard. Use lzma_options_delta.dist and such in docs so that in Doxygen's HTML output they will link to the doc of the struct member. Clean up a few trailing white spaces too. src/liblzma/api/lzma/block.h | 6 +++--- src/liblzma/api/lzma/delta.h | 6 +++--- src/liblzma/api/lzma/index.h | 10 +++++----- src/liblzma/api/lzma/stream_flags.h | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) commit d31fbd28be5b79eb682db50004b0fb7aad5299ec Author: Jia Tan Date: 2023-02-17 00:54:33 +0800 liblzma: Adjust spacing in doc headers in bcj.h. src/liblzma/api/lzma/bcj.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) commit 701e9be6bede19771895f756082db0c017f86f07 Author: Jia Tan Date: 2023-02-17 00:44:44 +0800 liblzma: Adjust documentation in bcj.h for consistent style. src/liblzma/api/lzma/bcj.h | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) commit 762c4d0b62d2694cf3a01e030fdfe68e09e7b503 Author: Jia Tan Date: 2023-02-17 00:36:05 +0800 liblzma: Rename field => member in documentation. Also adjusted preset value => preset level. src/liblzma/api/lzma/base.h | 18 +++++++-------- src/liblzma/api/lzma/block.h | 44 ++++++++++++++++++------------------- src/liblzma/api/lzma/container.h | 26 +++++++++++----------- src/liblzma/api/lzma/delta.h | 12 +++++----- src/liblzma/api/lzma/index.h | 30 ++++++++++++------------- src/liblzma/api/lzma/lzma12.h | 28 +++++++++++------------ src/liblzma/api/lzma/stream_flags.h | 32 +++++++++++++-------------- 7 files changed, 95 insertions(+), 95 deletions(-) commit 0ce1db0223854d94b4a0d17737ac0486a75d9e6a Author: Lasse Collin Date: 2023-02-16 17:59:50 +0200 liblzma: Silence a warning from MSVC. It gives C4146 here since unary minus with unsigned integer is still unsigned (which is the intention here). Doing it with substraction makes it clearer and avoids the warning. Thanks to Nathan Moinvaziri for reporting this. src/liblzma/check/crc64_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit d83da006b3e8dbad9112e6cde6b27049d193c9ec Author: Jia Tan Date: 2023-02-16 21:04:54 +0800 liblzma: Improve documentation for stream_flags.h Standardizing each function to always specify parameters and return values. Also moved the parameters and return values to the end of each function description. A few small things were reworded and long sentences broken up. src/liblzma/api/lzma/stream_flags.h | 76 ++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 30 deletions(-) commit 2796bb4736f645d34939b5d62d5958f534e41e69 Author: Jia Tan Date: 2023-02-14 21:50:16 +0800 liblzma: Improve documentation in lzma12.h. All functions now explicitly specify parameter and return values. src/liblzma/api/lzma/lzma12.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) commit ebebaa8d9381afea440eb1b01917117551adf68f Author: Jia Tan Date: 2023-01-27 22:44:06 +0800 liblzma: Improve documentation in check.h. All functions now explicitly specify parameter and return values. Also moved the note about SHA-256 functions not being exported to the top of the file. src/liblzma/api/lzma/check.h | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) commit 765fa2865aab97ea713c1715922e322b3cf599a7 Author: Jia Tan Date: 2023-02-08 21:33:52 +0800 liblzma: Improve documentation in index.h All functions now explicitly specify parameter and return values. src/liblzma/api/lzma/index.h | 177 ++++++++++++++++++++++++++++++------------- 1 file changed, 126 insertions(+), 51 deletions(-) commit 918e208af5960728b6def01e692b395f7d8e3701 Author: Jia Tan Date: 2023-02-08 20:35:32 +0800 liblzma: Reword a comment in index.h. src/liblzma/api/lzma/index.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 1f157d214bc09338876d2467b549935679abc0bf Author: Jia Tan Date: 2023-02-08 20:30:23 +0800 liblzma: Omit lzma_index_iter's internal field from Doxygen docs. Add \private above this field and its sub-fields since it is not meant to be modified by users. src/liblzma/api/lzma/index.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) commit 28757fa46d8e0f0a9c17628b2b6af9bcb3cd96fc Author: Jia Tan Date: 2023-01-21 21:32:03 +0800 liblzma: Fix documentation for LZMA_MEMLIMIT_ERROR. LZMA_MEMLIMIT_ERROR was missing the "<" character needed to put documentation after a member. src/liblzma/api/lzma/base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 135d5a1a65a9e199b9a5550c1f788cf43cd81018 Author: Jia Tan Date: 2023-01-21 00:29:38 +0800 liblzma: Improve documentation for base.h. Standardizing each function to always specify params and return values. Also fixed a small grammar mistake. src/liblzma/api/lzma/base.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) commit 2287d5668384e289d51e72724997dc920483768e Author: Jia Tan Date: 2023-02-14 00:08:33 +0800 liblzma: Minor improvements to vli.h. Added [out] annotations to parameters that are pointers and can have their value changed. Also added a clarification to lzma_vli_is_valid. src/liblzma/api/lzma/vli.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) commit 7124b8a16ae60cb2e93218ff531868eebd673bde Author: Jia Tan Date: 2023-02-10 21:38:02 +0800 liblzma: Add comments for macros in delta.h. Document LZMA_DELTA_DIST_MIN and LZMA_DELTA_DIST_MAX for completeness and to avoid Doxygen warnings. src/liblzma/api/lzma/delta.h | 8 ++++++++ 1 file changed, 8 insertions(+) commit 59c7bb8931159fdb1a31bbbeaed0e6984e2d3c81 Author: Jia Tan Date: 2023-02-10 21:35:23 +0800 liblzma: Improve documentation in index_hash.h. All functions now explicitly specify parameter and return values. Also reworded the description of lzma_index_hash_init() for readability. src/liblzma/api/lzma/index_hash.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) commit e970c28ac3cb2e8051925f81db2fe953664c2645 Author: Jia Tan Date: 2023-02-03 00:33:32 +0800 liblzma: Fix bug in lzma_str_from_filters() not checking filters[] length. The bug is only a problem in applications that do not properly terminate the filters[] array with LZMA_VLI_UNKNOWN or have more than LZMA_FILTERS_MAX filters. This bug does not affect xz. src/liblzma/common/string_conversion.c | 7 +++++++ 1 file changed, 7 insertions(+) commit 85e01266a96689448abb249da6c6abc3afcd4a4e Author: Jia Tan Date: 2023-02-03 00:32:47 +0800 Tests: Create test_filter_str.c. Tests lzma_str_to_filters(), lzma_str_from_filters(), and lzma_str_list_filters() API functions. CMakeLists.txt | 1 + tests/Makefile.am | 2 + tests/test_filter_str.c | 593 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 596 insertions(+) commit 3fa0f3ba12dd3383f62dbfa18a864f9b4217fa0a Author: Jia Tan Date: 2023-01-22 08:49:00 +0800 liblzma: Fix typos in comments in string_conversion.c. src/liblzma/common/string_conversion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 32dbe045d74e94f75c53236fa2a6c0454d7b6d9e Author: Jia Tan Date: 2023-02-03 00:20:20 +0800 liblzma: Clarify block encoder and decoder documentation. Added a few sentences to the description for lzma_block_encoder() and lzma_block_decoder() to highlight that the Block Header must be coded before calling these functions. src/liblzma/api/lzma/block.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) commit ccf12acbfa7331b1bbb99ec55879186eb35f879f Author: Jia Tan Date: 2023-02-03 00:12:24 +0800 Update lzma_block documentation for lzma_block_uncomp_encode(). src/liblzma/api/lzma/block.h | 3 +++ 1 file changed, 3 insertions(+) commit 6a0b168dd9dc1ef437255c4bfdc6eff8b96fdb24 Author: Jia Tan Date: 2023-02-03 00:11:37 +0800 liblzma: Minor edits to lzma_block header_size documentation. src/liblzma/api/lzma/block.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 84ce36f90e68471fec1f0e61cd93ac7ed9ab4883 Author: Jia Tan Date: 2023-02-03 00:11:07 +0800 liblzma: Enumerate functions that read version in lzma_block. src/liblzma/api/lzma/block.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) commit d6620774682830d606f57109861b6763805b3492 Author: Jia Tan Date: 2023-02-03 00:10:34 +0800 liblzma: Clarify comment in block.h. src/liblzma/api/lzma/block.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 880adb5aa25f66a53d81c2f3acc716f7a4d373d1 Author: Jia Tan Date: 2023-02-03 00:07:23 +0800 liblzma: Improve documentation for block.h. Standardizing each function to always specify params and return values. Output pointer parameters are also marked with doxygen style [out] to make it clear. Any note sections were also moved above the parameter and return sections for consistency. src/liblzma/api/lzma/block.h | 96 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 21 deletions(-) commit b5b1b1f061c342271e4977ce5cba604a19c0ca52 Author: Jia Tan Date: 2023-02-01 23:38:30 +0800 liblzma: Clarify a comment about LZMA_STR_NO_VALIDATION. The flag description for LZMA_STR_NO_VALIDATION was previously confusing about the treatment for filters than cannot be used with .xz format (lzma1) without using LZMA_STR_ALL_FILTERS. Now, it is clear that LZMA_STR_NO_VALIDATION is not a super set of LZMA_STR_ALL_FILTERS. src/liblzma/api/lzma/filter.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit e904e778b82b14f2779aab80d6c8f3c01a3fc54b Author: Jia Tan Date: 2023-01-27 20:14:51 +0800 Translations: Add Brazilian Portuguese translation of man pages. Thanks to Rafael Fontenelle. po4a/po4a.conf | 2 +- po4a/pt_BR.po | 3677 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 3678 insertions(+), 1 deletion(-) commit e9c47e79c9584ba1126f5fa0dbe6c96e67411aa6 Author: Jia Tan Date: 2023-01-24 20:48:50 +0800 liblzma: Fix documentation in filter.h for lzma_str_to_filters() The previous documentation for lzma_str_to_filters() was technically correct, but misleading. lzma_str_to_filters() returns NULL on success, which is in practice always defined to 0. This is the same value as LZMA_OK, but lzma_str_to_filters() does not return lzma_ret so we should be more clear. src/liblzma/api/lzma/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 99575947a58a60416c570eb78038d18a1ea4cef4 Author: Jia Tan Date: 2023-01-07 21:55:06 +0800 xz: Refactor duplicated check for custom suffix when using --format=raw src/xz/args.c | 8 ++++++++ src/xz/suffix.c | 26 ++++++++------------------ src/xz/suffix.h | 8 ++++++++ 3 files changed, 24 insertions(+), 18 deletions(-) commit 76dec92fcca4a9ccd2063ed6d5d9e3474665baad Author: Jia Tan Date: 2023-01-20 21:53:14 +0800 liblzma: Set documentation on all reserved fields to private. This prevents the reserved fields from being part of the generated Doxygen documentation. src/liblzma/api/lzma/base.h | 17 +++++++++++++++ src/liblzma/api/lzma/block.h | 43 +++++++++++++++++++++++++++++++++++++ src/liblzma/api/lzma/container.h | 24 +++++++++++++++++++++ src/liblzma/api/lzma/delta.h | 12 +++++++++++ src/liblzma/api/lzma/index.h | 27 +++++++++++++++++++++++ src/liblzma/api/lzma/lzma12.h | 22 +++++++++++++++++++ src/liblzma/api/lzma/stream_flags.h | 28 ++++++++++++++++++++++++ 7 files changed, 173 insertions(+) commit bd213d06ebf92cf42eeb97e7c578bddc857f8ea8 Author: Jia Tan Date: 2022-12-21 23:59:43 +0800 liblzma: Highlight liblzma API headers should not be included directly. This improves the generated Doxygen HTML files to better highlight how to properly use the liblzma API header files. src/liblzma/api/lzma/base.h | 5 +++-- src/liblzma/api/lzma/bcj.h | 5 +++-- src/liblzma/api/lzma/block.h | 5 +++-- src/liblzma/api/lzma/check.h | 5 +++-- src/liblzma/api/lzma/container.h | 5 +++-- src/liblzma/api/lzma/delta.h | 5 +++-- src/liblzma/api/lzma/filter.h | 5 +++-- src/liblzma/api/lzma/hardware.h | 5 +++-- src/liblzma/api/lzma/index.h | 5 +++-- src/liblzma/api/lzma/index_hash.h | 5 +++-- src/liblzma/api/lzma/lzma12.h | 5 +++-- src/liblzma/api/lzma/stream_flags.h | 5 +++-- src/liblzma/api/lzma/version.h | 5 +++-- src/liblzma/api/lzma/vli.h | 5 +++-- 14 files changed, 42 insertions(+), 28 deletions(-) commit 257dbff0ba1a7bc45a74d203ece015c9f70c88cd Author: Jia Tan Date: 2023-01-19 20:35:09 +0800 tuklib_physmem: Silence warning from -Wcast-function-type on MinGW-w64. tuklib_physmem depends on GetProcAddress() for both MSVC and MinGW-w64 to retrieve a function address. The proper way to do this is to cast the return value to the type of function pointer retrieved. Unfortunately, this causes a cast-function-type warning, so the best solution is to simply ignore the warning. src/common/tuklib_physmem.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 720ad4a44282a7ee59aa9920eaf495d85d245d97 Author: Jia Tan Date: 2023-01-16 21:35:45 +0800 xz: Add missing comment for coder_set_compression_settings() src/xz/coder.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 88dc19163421282369c0989e997c05f9a447edc6 Author: Jia Tan Date: 2023-01-16 20:55:10 +0800 xz: Do not set compression settings with raw format in list mode. Calling coder_set_compression_settings() in list mode with verbose mode on caused the filter chain and memory requirements to print. This was unnecessary since the command results in an error and not consistent with other formats like lzma and alone. src/xz/args.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 039e0ab13efb144642f9d99eeeb9c668e76bb430 Author: Jia Tan Date: 2023-01-13 20:37:06 +0800 Translations: Update the Brazilian Portuguese translation. po/pt_BR.po | 603 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 344 insertions(+), 259 deletions(-) commit 718f7a60e740b26b8353965edaaddc7d4deb4d85 Author: Lasse Collin Date: 2023-01-12 13:04:05 +0200 Build: Omit -Wmissing-noreturn from the default warnings. It's not that important. It can be annoying in builds that disable many features since in those cases the tests programs will correctly trigger this warning with Clang. configure.ac | 1 - 1 file changed, 1 deletion(-) commit 3ccedb09724c998c39d708c945f6da5852c39e13 Author: Lasse Collin Date: 2023-01-12 06:05:58 +0200 xz: Use ssize_t for the to-be-ignored return value from write(fd, ptr, 1). It makes no difference here as the return value fits into an int too and it then gets ignored but this looks better. src/xz/file_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 09fbd2f0527def89e839c8907de4fe7ef6bc5019 Author: Lasse Collin Date: 2023-01-12 06:01:12 +0200 xz: Silence warnings from -Wsign-conversion in a 32-bit build. src/common/tuklib_mbstr_fw.c | 2 +- src/xz/list.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) commit 683d3f178ef1487b5418be49f331b0131a101b40 Author: Lasse Collin Date: 2023-01-12 05:38:48 +0200 liblzma: Silence another warning from -Wsign-conversion in a 32-bit build. It doesn't warn on a 64-bit system because truncating a ptrdiff_t (signed long) to uint32_t is diagnosed under -Wconversion by GCC and -Wshorten-64-to-32 by Clang. src/liblzma/lz/lz_encoder_mf.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 2b8062ef94a38d0f9ad0d1b309e8748db52f5c15 Author: Lasse Collin Date: 2023-01-12 04:46:45 +0200 liblzma: Silence a warning from -Wsign-conversion in a 32-bit build. src/common/mythread.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit b16b9c0d22227012518595c2222facc73bd833a8 Author: Lasse Collin Date: 2023-01-12 04:17:24 +0200 Build: Make configure add more warning flags for GCC and Clang. -Wstrict-aliasing was removed from the list since it is enabled by -Wall already. A normal build is clean with these on GNU/Linux x86-64 with GCC 12.2.0 and Clang 14.0.6. configure.ac | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) commit c47ecd6d3909d0d3ff48dfd6e2ee41e7c7130b94 Author: Lasse Collin Date: 2023-01-12 04:14:18 +0200 Tests: Fix warnings from clang --Wassign-enum. Explicitly casting the integer to lzma_check silences the warning. Since such an invalid value is needed in multiple tests, a constant INVALID_LZMA_CHECK_ID was added to tests.h. The use of 0x1000 for lzma_block.check wasn't optimal as if the underlying type is a char then 0x1000 will be truncated to 0. However, in these test cases the value is ignored, thus even with such truncation the test would have passed. tests/test_block_header.c | 6 +++--- tests/test_check.c | 2 +- tests/test_stream_flags.c | 8 ++++---- tests/tests.h | 9 +++++++++ 4 files changed, 17 insertions(+), 8 deletions(-) commit 34e13ce015232968731de2a9ec3440a08b0084b1 Author: Lasse Collin Date: 2023-01-12 03:51:07 +0200 Tests: Silence warnings from -Wsign-conversion. Note that assigning an unsigned int to lzma_check doesn't warn on GNU/Linux x86-64 since the enum type is unsigned on that platform. The enum can be signed on some other platform though so it's best to use enumeration type lzma_check in these situations. tests/test_check.c | 6 +++--- tests/test_stream_flags.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) commit 6671d0fe46b77f0fafce860836b7a12dc3cda14a Author: Lasse Collin Date: 2023-01-12 03:19:59 +0200 liblzma: Silence warnings from clang -Wconditional-uninitialized. This is similar to 2ce4f36f179a81d0c6e182a409f363df759d1ad0. The actual initialization of the variables is done inside mythread_sync() macro. Clang doesn't seem to see that the initialization code inside the macro is always executed. src/liblzma/common/stream_decoder_mt.c | 8 +++++--- src/liblzma/common/stream_encoder_mt.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) commit d3e833ca1d2abda54648494c33aca73a40a47efe Author: Lasse Collin Date: 2023-01-12 03:11:40 +0200 Fix warnings from clang -Wdocumentation. src/liblzma/check/check.h | 4 ---- src/liblzma/lz/lz_encoder_mf.c | 4 ++-- src/xz/options.c | 4 ++-- 3 files changed, 4 insertions(+), 8 deletions(-) commit 977dd2e26bc50efec8d30fb90380394042d24558 Author: Lasse Collin Date: 2023-01-12 03:04:28 +0200 Tests: test_lzip_decoder: Remove trailing white-space. tests/test_lzip_decoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit c55157ed7437ab14f2afb7fecf331e321f2edf9a Author: Lasse Collin Date: 2023-01-12 03:03:55 +0200 Tests: test_lzip_decoder: Silence warnings from -Wsign-conversion. tests/test_lzip_decoder.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) commit 18b845e69752c975dfeda418ec00eda22605c2ee Author: Lasse Collin Date: 2023-01-11 18:52:54 +0200 Bump version and soname for 5.4.1. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 4080bbb844fb36701ffb978f0c41ea2c2c9f8960 Author: Jia Tan Date: 2023-01-11 23:58:16 +0800 Add NEWS for 5.4.1. NEWS | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) commit 674c89fdb8c457ebc3a0652e235d8b6cd7b7dee2 Author: Lasse Collin Date: 2023-01-10 11:56:11 +0200 sysdefs.h: Don't include strings.h anymore. On some platforms src/xz/suffix.c may need for strcasecmp() but suffix.c includes the header when it needs it. Unless there is an old system that otherwise supports enough C99 to build XZ Utils but doesn't have C89/C90-compatible , there should be no need to include in sysdefs.h. src/common/sysdefs.h | 6 ------ 1 file changed, 6 deletions(-) commit 2a6b938084fac9ddb39cd69c9beeed15c3b6f6f6 Author: Lasse Collin Date: 2023-01-10 11:23:41 +0200 xz: Include in suffix.c if needed for strcasecmp(). SUSv2 and POSIX.1‐2017 declare only a few functions in . Of these, strcasecmp() is used on some platforms in suffix.c. Nothing else in the project needs (at least if building on a modern system). sysdefs.h currently includes if HAVE_STRINGS_H is defined and suffix.c relied on this. Note that dos/config.h doesn't #define HAVE_STRINGS_H even though DJGPP does have strings.h. It isn't needed with DJGPP as strcasecmp() is also in in DJGPP. src/xz/suffix.c | 3 +++ 1 file changed, 3 insertions(+) commit aea639e81beb548e3114c74b6d9a894d6e036189 Author: Jia Tan Date: 2023-01-11 22:46:48 +0800 xz: Fix warning -Wformat-nonliteral on clang in message.c. clang and gcc differ in how they handle -Wformat-nonliteral. gcc will allow a non-literal format string as long as the function takes its format arguments as a va_list. src/xz/message.c | 9 +++++++++ 1 file changed, 9 insertions(+) commit e3b42bfcb0f67988beee7c7022fed0361282be45 Author: Jia Tan Date: 2023-01-11 20:58:31 +0800 Tests: Fix test_filter_flags copy/paste error. tests/test_filter_flags.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 21625b7e11d004788e40eb5eb88d9d89f65fe347 Author: Jia Tan Date: 2023-01-11 20:42:29 +0800 Tests: Fix type-limits warning in test_filter_flags. This only occurs in test_filter_flags when the BCJ filters are not configured and built. In this case, ARRAY_SIZE() returns 0 and causes a type-limits warning with the loop variable since an unsigned number will always be >= 0. tests/test_filter_flags.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) commit c337983e928682d56ce3470b286a8d5b8646e0ad Author: Lasse Collin Date: 2023-01-10 22:14:03 +0200 liblzma: CLMUL CRC64: Work around a bug in MSVC, second attempt. This affects only 32-bit x86 builds. x86-64 is OK as is. I still cannot easily test this myself. The reporter has tested this and it passes the tests included in the CMake build and performance is good: raw CRC64 is 2-3 times faster than the C version of the slice-by-four method. (Note that liblzma doesn't include a MSVC-compatible version of the 32-bit x86 assembly code for the slice-by-four method.) Thanks to Iouri Kharon for figuring out a fix, testing, and benchmarking. src/liblzma/check/crc64_fast.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit b7fb438ea0e3ee02e3a164f3b72fae456cbe34d7 Author: Jia Tan Date: 2023-01-11 01:18:50 +0800 Tests: Fix unused function warning in test_block_header. One of the global arrays of filters was only used in a test that required both encoders and decoders to be configured in the build. tests/test_block_header.c | 4 ++++ 1 file changed, 4 insertions(+) commit 68e9ef036d18d7c3952bff0b391d5989b86934da Author: Jia Tan Date: 2023-01-11 01:08:03 +0800 Tests: Fix unused function warning in test_index_hash. test_index_hash does not use fill_index_hash() unless both encoders and decoders are configured in the build. tests/test_index_hash.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit a387707cd8cdefbffb5b7429bda4b7fcc192954a Author: Lasse Collin Date: 2023-01-10 00:33:14 +0200 Windows: Update INSTALL-MSVC.txt to recommend CMake over project files. windows/INSTALL-MSVC.txt | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) commit 52902ad69518255a14b0144f0a2379e06fde5b6e Author: Lasse Collin Date: 2023-01-10 12:47:16 +0200 Revert "liblzma: CLMUL CRC64: Workaround a bug in MSVC (VS2015-2022)." This reverts commit 36edc65ab4cf10a131f239acbd423b4510ba52d5. It was reported that it wasn't a good enough fix and MSVC still produced (different kind of) bad code when building for 32-bit x86 if optimizations are enabled. Thanks to Iouri Kharon. src/liblzma/check/crc64_fast.c | 6 ------ 1 file changed, 6 deletions(-) commit e81b9fc48ca70f9228308d3f1871cd81f9a5a496 Author: Lasse Collin Date: 2023-01-10 10:05:13 +0200 sysdefs.h: Fix a comment. src/common/sysdefs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6e89ab58b031aa046308a0b3504ff0a5be042571 Author: Lasse Collin Date: 2023-01-10 10:04:06 +0200 sysdefs.h: Don't include memory.h anymore even if it were available. It quite probably was never needed, that is, any system where memory.h was required likely couldn't compile XZ Utils for other reasons anyway. XZ Utils 5.2.6 and later source packages were generated using Autoconf 2.71 which no longer defines HAVE_MEMORY_H. So the code being removed is no longer used anyway. src/common/sysdefs.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit 65c59ad429aa59f9df0326d9fc82931ba4a9d123 Author: Lasse Collin Date: 2023-01-10 08:50:26 +0200 CMake/Windows: Add a workaround for windres from GNU binutils. This is combined from the following commits in the master branch: 443dfebced041adc88f10d824188eeef5b5821a9 6b117d3b1fe91eb26d533ab16a2e552f84148d47 5e34774c31d1b7509b5cb77a3be9973adec59ea0 Thanks to Iouri Kharon for the bug report, the original patch, and testing. CMakeLists.txt | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) commit 43521e77acc907863fa4f94aae276366172cb9ee Author: Lasse Collin Date: 2023-01-06 22:53:38 +0200 Tests: test_filter_flags: Clean up minor issues. Here are the list of the most significant issues addressed: - Avoid using internal common.h header. It's not good to copy the constants like this but common.h cannot be included for use outside of liblzma. This is the quickest thing to do that could be fixed later. - Omit the INIT_FILTER macro. Initialization should be done with just regular designated initializers. - Use start_offset = 257 for BCJ tests. It demonstrates that Filter Flags encoder and decoder don't validate the options thoroughly. 257 is valid only for the x86 filter. This is a bit silly but not a significant problem in practice because the encoder and decoder initialization functions will catch bad alignment still. Perhaps this should be fixed but it's not urgent and doesn't need to be in 5.4.x. - Various tweaks to comments such as filter id -> Filter ID tests/test_filter_flags.c | 153 +++++++++++++++++++++++----------------------- 1 file changed, 78 insertions(+), 75 deletions(-) commit 6b44cead95d767414272dc3a67898a36bfdf95b3 Author: Jia Tan Date: 2022-12-29 23:33:33 +0800 Tests: Refactors existing filter flags tests. Converts the existing filter flags tests into tuktests. tests/test_filter_flags.c | 655 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 457 insertions(+), 198 deletions(-) commit 1bbefa9659b202ba31bd244a9d0e4f0d37ff3ed7 Author: Lasse Collin Date: 2023-01-08 00:32:29 +0200 Tests: tuktest.h: Support tuktest_malloc(0). It's not needed in XZ Utils at least for now. It's good to support it still because if such use is needed later, it wouldn't be caught on GNU/Linux since malloc(0) from glibc returns non-NULL. tests/tuktest.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit ce3a3fbc7c2c399aeed644d54f3bd56ac914dfee Author: Lasse Collin Date: 2023-01-07 21:57:11 +0200 CMake: Update cmake_minimum_required from 3.13...3.16 to 3.13...3.25. The changes listed on cmake-policies(7) for versions 3.17 to 3.25 shouldn't affect this project. CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 99fcd57f2ea35eaa94e09f674d5364329c880fa2 Author: Lasse Collin Date: 2023-01-08 00:24:23 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit c0c13d9d82eb8a4302c8bbb8b4c5178d285fe9ab Author: Lasse Collin Date: 2023-01-07 19:50:35 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 3d45987451b1c3bb42697b29341824c0e5484cba Author: Lasse Collin Date: 2023-01-09 11:27:24 +0200 CMake: Fix a copypaste error in xzdec Windows resource file handling. It was my mistake. Thanks to Iouri Kharon for the bug report. CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 706bce5018d7cf83094e13454a0731169ec119b5 Author: Lasse Collin Date: 2023-01-07 19:50:03 +0200 CMake/Windows: Add resource files to xz.exe and xzdec.exe. The command line tools cannot be built with MSVC for now but they can be built with MinGW-w64. Thanks to Iouri Kharon for the bug report and the original patch. CMakeLists.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) commit e96dee55df04113c33b387ccdb6cb70935422d91 Author: Lasse Collin Date: 2023-01-09 12:22:05 +0200 liblzma: CLMUL CRC64: Workaround a bug in MSVC (VS2015-2022). I haven't tested with MSVC myself and there doesn't seem to be information about the problem online, so I'm relying on the bug report. Thanks to Iouri Kharon for the bug report and the patch. src/liblzma/check/crc64_fast.c | 6 ++++++ 1 file changed, 6 insertions(+) commit 52bc1ee34dda9bb6fb40175e5952863066681b77 Author: Lasse Collin Date: 2023-01-07 19:31:15 +0200 Build: Require that _mm_set_epi64x() is usable to enable CLMUL support. VS2013 doesn't have _mm_set_epi64x() so this way CLMUL gets disabled with VS2013. Thanks to Iouri Kharon for the bug report. CMakeLists.txt | 3 ++- configure.ac | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) commit bad44cfe19e8be8ae76755369be2a34abcd2b4fa Author: Jia Tan Date: 2022-12-29 00:25:18 +0800 Tests: Creates test_index_hash.c Tests all API functions exported from index_hash.h. Does not have a dedicated test for lzma_index_hash_end. [Minor edits were made by Lasse Collin.] .gitignore | 1 + CMakeLists.txt | 2 + tests/Makefile.am | 3 + tests/test_index_hash.c | 388 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 394 insertions(+) commit 692ccdf5516dfe55fb6e9c5cdfb31f4c02c1ecd1 Author: Jia Tan Date: 2023-01-05 20:57:25 +0800 liblzma: Remove common.h include from common/index.h. common/index.h is needed by liblzma internally and tests. common.h will include and define many things that are not needed by the tests. Also, this prevents include order problems because both common.h and lzma.h define LZMA_API. On most platforms it results only in a warning but on Windows it would break the build as the definition in common.h must be used only for building liblzma itself. src/liblzma/common/index.c | 1 + src/liblzma/common/index.h | 9 +++++++-- src/liblzma/common/index_decoder.h | 1 + src/liblzma/common/stream_buffer_encoder.c | 1 + 4 files changed, 10 insertions(+), 2 deletions(-) commit 2ac7bafc8f07c1edefe96a4a7a040ddfff0eb5bb Author: Jia Tan Date: 2022-08-17 20:20:16 +0800 liblzma: Add NULL check to lzma_index_hash_append. This is for consistency with lzma_index_append. src/liblzma/common/index_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit db714d30e0c74d1dd4af1a23ed62b44e0e8e4efc Author: Jia Tan Date: 2022-08-17 17:59:51 +0800 liblzma: Replaced hardcoded 0x0 index indicator byte with macro src/liblzma/common/index.h | 3 +++ src/liblzma/common/index_decoder.c | 2 +- src/liblzma/common/index_encoder.c | 2 +- src/liblzma/common/index_hash.c | 2 +- src/liblzma/common/stream_decoder.c | 3 ++- src/liblzma/common/stream_decoder_mt.c | 2 +- 6 files changed, 9 insertions(+), 5 deletions(-) commit 39d2585dcd3e827cfc3c46025ab6708c4aeb36c6 Author: Jia Tan Date: 2023-01-06 20:43:31 +0800 Style: Change #if !defined() to #ifndef in mythread.h. src/common/mythread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 3f0130aa288e4ed57ace609517db9700a41223af Author: Jia Tan Date: 2023-01-06 20:35:55 +0800 Build: Add missing stream_decoder_mt.c to .vcxproj files. The line in the .vcxproj files for building with was missing in 5.4.0. Thank to Hajin Jang for reporting the issue. windows/vs2013/liblzma.vcxproj | 1 + windows/vs2013/liblzma_dll.vcxproj | 1 + windows/vs2017/liblzma.vcxproj | 1 + windows/vs2017/liblzma_dll.vcxproj | 1 + windows/vs2019/liblzma.vcxproj | 1 + windows/vs2019/liblzma_dll.vcxproj | 1 + 6 files changed, 6 insertions(+) commit f204d1050a515d17851eed9049862ce5a9c729c1 Author: Lasse Collin Date: 2023-01-04 22:40:54 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 34a9c2d650d6c30bd88e1b21910dd863209aa884 Author: Lasse Collin Date: 2023-01-04 18:40:28 +0200 Tests: Adjust style in test_compress.sh. tests/test_compress.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) commit 761c208d58e0c3daa0f46e68b406adfc318d2a46 Author: Jia Tan Date: 2023-01-04 23:58:58 +0800 Tests: Replace non portable shell parameter expansion The shell parameter expansion using # and ## is not supported in Solaris 10 Bourne shell (/bin/sh). Even though this is POSIX, it is not fully portable, so we should avoid it. tests/create_compress_files.c | 2 +- tests/test_compress.sh | 20 +++++++++++++------- tests/test_compress_prepared_bcj_sparc | 2 +- tests/test_compress_prepared_bcj_x86 | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) commit 8a7cbc074547e55e57f4f3696f69bedeb05e14c4 Author: Jia Tan Date: 2023-01-03 21:02:38 +0800 Translations: Add Korean translation of man pages. Thanks to Seong-ho Cho po4a/ko.po | 5552 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po4a/po4a.conf | 2 +- 2 files changed, 5553 insertions(+), 1 deletion(-) commit ca2af49bb8be5995eb0e6a3abf457622626d49a7 Author: Jia Tan Date: 2023-01-03 20:47:27 +0800 Translations: Update the Esperanto translation. po/eo.po | 620 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 332 insertions(+), 288 deletions(-) commit bfba3394aed03311fe9a746d3141b2e16d8b9325 Author: Lasse Collin Date: 2023-01-02 17:05:07 +0200 Build: Fix config.h comments. configure.ac | 2 +- m4/tuklib_progname.m4 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 507648ad114c2ae0cd6d181063e1ac07e8106718 Author: Jia Tan Date: 2023-01-02 22:33:48 +0800 Build: Only define HAVE_PROGRAM_INVOCATION_NAME if it is set to 1. HAVE_DECL_PROGRAM_INVOCATION_NAME is renamed to HAVE_PROGRAM_INVOCATION_NAME. Previously, HAVE_DECL_PROGRAM_INVOCATION_NAME was always set when building with autotools. CMake would only set this when it was 1, and the dos/config.h did not define it. The new macro definition is consistent across build systems. cmake/tuklib_progname.cmake | 5 ++--- m4/tuklib_progname.m4 | 5 ++++- src/common/tuklib_progname.c | 2 +- src/common/tuklib_progname.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) commit ab5229d32adfec1f3fbc95228d9dd6f560732ab5 Author: Lasse Collin Date: 2022-12-30 20:10:08 +0200 Tests: test_check: Test corner cases of CLMUL CRC64. tests/test_check.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) commit 8791826f31733fda0a13b411c2ed930faaeb25aa Author: Lasse Collin Date: 2022-12-30 19:36:49 +0200 Tests: Clarify a comment in test_lzip_decoder.c. tests/test_lzip_decoder.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit c410d812ea12bfc74f6b727c1a799478c79f19ca Author: Jia Tan Date: 2022-12-29 01:55:19 +0800 xz: Includes and conditionally in mytime.c. Previously, mytime.c depended on mythread.h for to be included. src/xz/mytime.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit 501c6013d4a59fae5d4368e9657c4885493db809 Author: Jia Tan Date: 2022-12-29 01:15:27 +0800 liblzma: Includes sys/time.h conditionally in mythread Previously, was always included, even if mythread only used clock_gettime. is still needed even if clock_gettime is not used though because struct timespec is needed for mythread_condtime. src/common/mythread.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) commit 9e3cb514b5b95bd235dcdff3db4436f57444ee4f Author: Jia Tan Date: 2022-12-29 01:10:53 +0800 Build: No longer require HAVE_DECL_CLOCK_MONOTONIC to always be set. Previously, if threading was enabled HAVE_DECL_CLOCK_MONOTONIC would always be set to 0 or 1. However, this macro was needed in xz so if xz was not built with threading and HAVE_DECL_CLOCK_MONOTONIC was not defined but HAVE_CLOCK_GETTIME was, it caused a warning during build. Now, HAVE_DECL_CLOCK_MONOTONIC has been renamed to HAVE_CLOCK_MONOTONIC and will only be set if it is 1. CMakeLists.txt | 8 +++----- configure.ac | 5 ++++- src/common/mythread.h | 4 ++-- src/xz/mytime.c | 5 ++--- 4 files changed, 11 insertions(+), 11 deletions(-) commit 6fc3e5467911572fa9af4021ea46396261aae796 Author: Jia Tan Date: 2022-12-28 01:14:07 +0800 Translations: Add Ukrainian translations of man pages. Thanks to Yuri Chornoivan po4a/po4a.conf | 2 +- po4a/uk.po | 3676 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 3677 insertions(+), 1 deletion(-) commit e84f2ab7f8bc38cd8f8befa0bb398656c3c11f8e Author: Jia Tan Date: 2022-12-22 23:14:53 +0800 liblzma: Update documentation for lzma_filter_encoder. src/liblzma/common/filter_encoder.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit b14b8dbba9a3b232787ae218b46430b9246383dd Author: Jia Tan Date: 2022-12-21 21:12:03 +0800 Tests: Adds lzip decoder tests .gitignore | 1 + tests/Makefile.am | 2 + tests/test_lzip_decoder.c | 471 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 474 insertions(+) commit 09a114805e1d4f9a02a06cee7dbf2f5014d1f710 Author: Jia Cheong Tan Date: 2022-12-20 22:05:21 +0800 Doxygen: Update .gitignore for generating docs for in source build. In source builds are not recommended, but we should still ignore the generated artifacts. .gitignore | 2 ++ 1 file changed, 2 insertions(+) commit d3e6fe44196bf9478ad193522e2b48febf2eca6b Author: Jia Tan Date: 2022-12-20 20:46:44 +0800 liblzma: Fix lzma_microlzma_encoder() return value. Using return_if_error on lzma_lzma_lclppb_encode was improper because return_if_error is expecting an lzma_ret value, but lzma_lzma_lclppb_encode returns a boolean. This could result in lzma_microlzma_encoder, which would be misleading for applications. src/liblzma/common/microlzma_encoder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit b55a27b46f52524a4a8d9cdef03e6689cefc1375 Author: Lasse Collin Date: 2022-12-16 18:30:02 +0200 liblzma: Update authors list in arm64.c. src/liblzma/simple/arm64.c | 1 + 1 file changed, 1 insertion(+) commit 2fd28d2b7cec3468324a6f15eff7e73c285b1d7d Author: Jia Tan Date: 2022-12-16 20:58:55 +0800 CMake: Update .gitignore for CMake artifacts from in source build. In source builds are not recommended, but we can make it easier by ignoring the generated artifacts from CMake. .gitignore | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) commit b69da6d4bb6bb11fc0cf066920791990d2b22a06 Author: Lasse Collin Date: 2022-12-13 20:37:17 +0200 Bump version to 5.4.0 and soname to 5.4.0. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- src/liblzma/liblzma_generic.map | 2 +- src/liblzma/liblzma_linux.map | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) commit 20869eb3fb280ff4f271ef527b12b6bf68b05e19 Author: Lasse Collin Date: 2022-12-13 20:29:39 +0200 Update INSTALL: CMake on Windows isn't experimental anymore. Using CMake to build liblzma should work on a few other OSes but building the command line tools is still subtly broken. It is known that shared library versioning may differ between CMake and Libtool builds on some OSes, most notably Darwin. INSTALL | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) commit cbbd84451944e3e8c63acfaa3c923f6d8aff7852 Author: Lasse Collin Date: 2022-12-13 19:47:53 +0200 Add NEWS for 5.4.0. NEWS | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) commit c3e94d37e8d10a3e96019864b6f5d7b578db2c14 Author: Lasse Collin Date: 2022-12-13 17:41:20 +0200 Fix a typo in NEWS. NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 0d2a2e0a545c3da2b3e9500f1e531eb903087245 Author: Lasse Collin Date: 2022-12-13 17:41:03 +0200 Add NEWS for 5.2.10. NEWS | 12 ++++++++++++ 1 file changed, 12 insertions(+) commit 177ece1c8eb007188fb1b04eff09ca2193fbdea6 Author: Lasse Collin Date: 2022-12-13 12:30:45 +0200 Tests: Fix a typo in tests/files/README. tests/files/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 60f45bdbaa6b07558b3f4baac285739b0c6342f5 Author: Lasse Collin Date: 2022-12-13 12:30:09 +0200 Tests: Add two ARM64 test files. tests/files/README | 7 +++++++ tests/files/good-1-arm64-lzma2-1.xz | Bin 0 -> 512 bytes tests/files/good-1-arm64-lzma2-2.xz | Bin 0 -> 488 bytes tests/test_files.sh | 5 +++++ 4 files changed, 12 insertions(+) commit f5e419550619c548c7c35d7e367cf00580a56521 Author: Lasse Collin Date: 2022-12-12 22:44:21 +0200 Translations: Update the Catalan translation. po/ca.po | 657 +++++++++++++++++++++++++++++---------------------------------- 1 file changed, 306 insertions(+), 351 deletions(-) commit 0fb9d355da3789b1757040af475b4e6bbc8b8af8 Author: Lasse Collin Date: 2022-12-12 19:18:12 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit e5b6c161c61a37c54dcb76a99bbb83ac4abe02dc Author: Lasse Collin Date: 2022-12-12 19:07:58 +0200 Update AUTHORS. AUTHORS | 12 ++++++++++++ 1 file changed, 12 insertions(+) commit f2d98e691099d82054d5f3071ef6f5e809932e44 Author: Lasse Collin Date: 2022-12-12 15:31:14 +0200 Docs: Omit multi-threaded decompress from TODO. The TODO file outdated still. TODO | 2 -- 1 file changed, 2 deletions(-) commit b42908c42a4cc091db45a7e5ba0e0ecceaa3f6da Author: Lasse Collin Date: 2022-12-11 21:16:09 +0200 Docs: Update xz-file-format.txt to 1.1.0 for ARM64 filter. doc/xz-file-format.txt | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) commit 854f2f5946b353cb0963fd6dfd54d363adc89b9f Author: Lasse Collin Date: 2022-12-11 21:13:57 +0200 xz: Rename --experimental-arm64 to --arm64. src/xz/args.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 31dbd1e5fb65831915a7bbb531c3f19aea8d57a5 Author: Lasse Collin Date: 2022-12-11 21:13:06 +0200 liblzma: Change LZMA_FILTER_ARM64 to the official Filter ID 0x0A. src/liblzma/api/lzma/bcj.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) commit 01b3549e523edac899ec4925b282ceddd20da116 Author: Lasse Collin Date: 2022-12-08 19:24:22 +0200 xz: Make args_info.files_name a const pointer. src/xz/args.c | 2 +- src/xz/args.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit bc665b84ea6bf7946394a08122177efe41b26a5f Author: Lasse Collin Date: 2022-12-08 19:18:16 +0200 xz: Don't modify argv[]. The code that parses --memlimit options and --block-list modified the argv[] when parsing the option string from optarg. This was visible in "ps auxf" and such and could be confusing. I didn't understand it back in the day when I wrote that code. Now a copy is allocated when modifiable strings are needed. src/xz/args.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) commit a13064e1c290de7933db72b6dffbd65cfce59c9f Author: Lasse Collin Date: 2022-12-08 18:18:57 +0200 Translations: Update the German man page translations. po4a/de.po | 4570 ++++++++++++++++++------------------------------------------ 1 file changed, 1374 insertions(+), 3196 deletions(-) commit 8bdbe42a8d0d75dff70206b923fc4bce5c69a40a Author: Jia Tan Date: 2022-12-06 23:05:56 +0800 Translations: Update the German translation. po/de.po | 586 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 315 insertions(+), 271 deletions(-) commit 5c304b57c24ef40ff57f864301065f0244c05bde Author: Jia Tan Date: 2022-12-06 23:04:25 +0800 Translations: Update the Turkish translation. po/tr.po | 221 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 108 insertions(+), 113 deletions(-) commit 6d86781fdb937970486500447ebb49b98244235b Author: Jia Tan Date: 2022-12-06 23:02:11 +0800 Translations: Update the Croatian translation. po/hr.po | 228 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 113 insertions(+), 115 deletions(-) commit 7a5b4b8075eb36026b1796f04ffed5830c42396a Author: Jia Tan Date: 2022-12-06 22:56:30 +0800 Translations: Add Romanian translation of man pages. Thanks to Remus-Gabriel Chelu. po4a/po4a.conf | 2 +- po4a/ro.po | 3692 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 3693 insertions(+), 1 deletion(-) commit c6977e7400088177556e8771bcb839eb7d90caa3 Author: Jia Tan Date: 2022-12-06 22:52:13 +0800 Translations: Update the Romanian translation. po/ro.po | 294 +++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 147 insertions(+), 147 deletions(-) commit ac2a747e939c2cbccff7a49c399769af5e02d2ab Author: Lasse Collin Date: 2022-12-08 17:30:09 +0200 liblzma: Check for unexpected NULL pointers in block_header_decode(). The API docs gave an impression that such checks are done but they actually weren't done. In practice it made little difference since the calling code has a bug if these are NULL. Thanks to Jia Tan for the original patch that checked for block->filters == NULL. src/liblzma/common/block_header_decoder.c | 4 ++++ 1 file changed, 4 insertions(+) commit 24790f49ae66938c1c7574315e1c0aba1ed5ed25 Author: Lasse Collin Date: 2022-12-01 20:59:32 +0200 Bump version number for 5.3.5beta. This also sorts the symbol names alphabetically in liblzma_*.map. src/liblzma/api/lzma/version.h | 4 ++-- src/liblzma/liblzma_generic.map | 10 +++++----- src/liblzma/liblzma_linux.map | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) commit 7e53c5bcb3c2c17f47c096c06ff6b1481e6ecafa Author: Lasse Collin Date: 2022-12-01 20:57:26 +0200 Add NEWS for 5.3.5beta. NEWS | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) commit 5865f2aaac326fcbd9f8a7d62defa230e4cb644e Author: Lasse Collin Date: 2022-12-01 20:57:09 +0200 Update THANKS. THANKS | 3 +++ 1 file changed, 3 insertions(+) commit 62b270988ec67314d69976df484d2974c6eacfda Author: Lasse Collin Date: 2022-12-01 20:04:17 +0200 liblzma: Use __has_attribute(__symver__) to fix Clang detection. If someone sets up Clang to define __GNUC__ to 10 or greater then symvers broke. __has_attribute is supported by such GCC and Clang versions that don't support __symver__ so this should be much better and simpler way to detect if __symver__ is actually supported. Thanks to Tomasz Gajc for the bug report. src/liblzma/common/common.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) commit f9ca7d45162664ddd9fb70e19335c2426e5d75bb Author: Lasse Collin Date: 2022-12-01 18:51:52 +0200 liblzma: Omit zero-skipping from ARM64 filter. It has some complicated downsides and its usefulness is more limited than I originally thought. So this change is bad for certain very specific situations but a generic solution that works for other filters (and is otherwise better too) is planned anyway. And this way 7-Zip can use the same compatible filter for the .7z format. This is still marked as experimental with a new temporary Filter ID. src/liblzma/api/lzma/bcj.h | 2 +- src/liblzma/simple/arm64.c | 81 +++++++++++++--------------------------------- 2 files changed, 24 insertions(+), 59 deletions(-) commit 5baec3f0a9c85e6abf45c0f652f699b074129a8b Author: Lasse Collin Date: 2022-12-01 18:13:27 +0200 xz: Omit the special notes about ARM64 filter on the man page. src/xz/xz.1 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) commit 0c3627b51862eb0dcdd4fc283d046250571991c6 Author: Lasse Collin Date: 2022-12-01 18:12:03 +0200 liblzma: Don't be over-specific in lzma_str_to_filters API doc. src/liblzma/api/lzma/filter.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit 94adf057f27b1970f493dc99cd166407d7255639 Author: Lasse Collin Date: 2022-12-01 17:54:23 +0200 liblzma: Silence unused variable warning when BCJ filters are disabled. Thanks to Jia Tan for the original patch. src/liblzma/common/string_conversion.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) commit c68af4441744e5ffc41a472e1be9c9d53a1d9780 Author: Lasse Collin Date: 2022-12-01 17:38:03 +0200 Translations: Update the Chinese (simplified) translation. po/zh_CN.po | 608 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 348 insertions(+), 260 deletions(-) commit 3be6942e5c27d29995d41da52fbe274e4ce4a537 Author: Lasse Collin Date: 2022-11-30 18:55:03 +0200 Add NEWS for 5.2.9. NEWS | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) commit 7c16e312cb2f40b81154c0e5be13a3c6b8da485d Author: Jia Tan Date: 2022-11-30 23:33:08 +0800 xz: Remove message_filters_to_str function prototype from message.h. This was forgotten from 7484744af6cbabe81e92af7d9e061dfd597fff7b. src/xz/message.h | 16 ---------------- 1 file changed, 16 deletions(-) commit 764955e2d4f2a5e8d6d6fec63af694f799e050e7 Author: Lasse Collin Date: 2022-11-30 18:08:34 +0200 Change the bug report address. It forwards to me and Jia Tan. Also update the IRC reference in README as #tukaani was moved to Libera Chat long ago. CMakeLists.txt | 2 +- README | 11 +++++------ configure.ac | 2 +- dos/config.h | 2 +- windows/README-Windows.txt | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) commit c21983c76031e01da01ad3c6cc716fe4b8a75070 Author: Lasse Collin Date: 2022-11-30 17:50:17 +0200 Build: Add string_conversion.c to CMake, DOS, and VS files. CMakeLists.txt | 1 + dos/Makefile | 1 + windows/vs2013/liblzma.vcxproj | 1 + windows/vs2013/liblzma_dll.vcxproj | 1 + windows/vs2017/liblzma.vcxproj | 1 + windows/vs2017/liblzma_dll.vcxproj | 1 + windows/vs2019/liblzma.vcxproj | 1 + windows/vs2019/liblzma_dll.vcxproj | 1 + 8 files changed, 8 insertions(+) commit 30be0c35d24eb5175459d69dbf7d92e2b087ef82 Author: Lasse Collin Date: 2022-11-30 17:38:32 +0200 Update to HTTPS URLs in AUTHORS. AUTHORS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 0a72b9ca2fe20082da9b7128fe0d908af947a851 Author: Jia Tan Date: 2022-11-30 00:52:06 +0800 liblzma: Improve documentation for string to filter functions. src/liblzma/api/lzma/filter.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) commit a6e21fcede3b196160a52dd294d965c508a4bb33 Author: Lasse Collin Date: 2022-11-29 22:27:42 +0200 liblzma: Two fixes to lzma_str_list_filters() API docs. Thanks to Jia Tan. src/liblzma/api/lzma/filter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 7484744af6cbabe81e92af7d9e061dfd597fff7b Author: Lasse Collin Date: 2022-11-28 21:57:47 +0200 xz: Use lzma_str_from_filters(). Two uses: Displaying encoder filter chain when compressing with -vv, and displaying the decoder filter chain in --list -vv. src/xz/list.c | 28 ++++++--- src/xz/message.c | 175 +++---------------------------------------------------- 2 files changed, 28 insertions(+), 175 deletions(-) commit cedeeca2ea6ada5b0411b2ae10d7a859e837f203 Author: Lasse Collin Date: 2022-11-28 21:37:48 +0200 liblzma: Add lzma_str_to_filters, _from_filters, and _list_filters. lzma_str_to_filters() uses static error messages which makes them not very precise. It tells the position in the string where an error occurred though which helps quite a bit if applications take advantage of it. Dynamic error messages can be added later with a new flag if it seems important enough. src/liblzma/api/lzma/filter.h | 258 +++++++ src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/string_conversion.c | 1302 ++++++++++++++++++++++++++++++++ src/liblzma/liblzma_generic.map | 3 + src/liblzma/liblzma_linux.map | 3 + 5 files changed, 1567 insertions(+) commit 072ebf7b1335421193ffa9d4a70d5533786b8995 Author: Lasse Collin Date: 2022-11-28 21:02:19 +0200 liblzma: Make lzma_validate_chain() available outside filter_common.c. src/liblzma/common/filter_common.c | 8 ++++---- src/liblzma/common/filter_common.h | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) commit 5f22bd2d37e3bd01a5d701b51750eb51f09c11bf Author: Lasse Collin Date: 2022-11-28 10:51:03 +0200 liblzma: Remove lzma_lz_decoder_uncompressed() as it's now unused. src/liblzma/lz/lz_decoder.c | 14 -------------- src/liblzma/lz/lz_decoder.h | 3 --- 2 files changed, 17 deletions(-) commit cee83206465b95729ab649aa2f57fdbde8dcaf89 Author: Lasse Collin Date: 2022-11-28 10:48:53 +0200 liblzma: Use LZMA1EXT feature in lzma_microlzma_decoder(). Here too this avoids the slightly ugly method to set the uncompressed size. Also moved the setting of dict_size to the struct initializer. src/liblzma/common/microlzma_decoder.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) commit e310e8b6a490dfb468f4ed68feff246d776b323c Author: Lasse Collin Date: 2022-11-28 10:28:20 +0200 liblzma: Use LZMA1EXT feature in lzma_alone_decoder(). This avoids the need to use the slightly ugly method to set the uncompressed size. src/liblzma/common/alone_decoder.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) commit 33b8a24b6646a9dbfd8358405aec466b13078559 Author: Lasse Collin Date: 2022-11-27 23:16:21 +0200 liblzma: Add LZMA_FILTER_LZMA1EXT to support LZMA1 without end marker. Some file formats need support for LZMA1 streams that don't use the end of payload marker (EOPM) alias end of stream (EOS) marker. So far liblzma API has supported decompressing such streams via lzma_alone_decoder() when .lzma header specifies a known uncompressed size. Encoding support hasn't been available in the API. Instead of adding a new LZMA1-only API for this purpose, this commit adds a new filter ID for use with raw encoder and decoder. The main benefit of this approach is that then also filter chains are possible, for example, if someone wants to implement support for .7z files that use the x86 BCJ filter with LZMA1 (not BCJ2 as that isn't supported in liblzma). src/liblzma/api/lzma/lzma12.h | 123 ++++++++++++++++++++++++++++++-- src/liblzma/common/filter_common.c | 7 ++ src/liblzma/common/filter_decoder.c | 6 ++ src/liblzma/common/filter_encoder.c | 9 +++ src/liblzma/lzma/lzma2_encoder.c | 2 +- src/liblzma/lzma/lzma_decoder.c | 26 ++++++- src/liblzma/lzma/lzma_encoder.c | 40 +++++++++-- src/liblzma/lzma/lzma_encoder.h | 3 +- src/liblzma/lzma/lzma_encoder_private.h | 3 + 9 files changed, 204 insertions(+), 15 deletions(-) commit 9a304bf1e45b3ddf61aaeaa7c764915b34618ede Author: Lasse Collin Date: 2022-11-27 18:43:07 +0200 liblzma: Avoid unneeded use of void pointer in LZMA decoder. src/liblzma/lzma/lzma_decoder.c | 3 +-- src/liblzma/lzma/lzma_decoder.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) commit 218394958c7683f892275bb40eae880620feebcc Author: Lasse Collin Date: 2022-11-27 18:20:33 +0200 liblzma: Pass the Filter ID to LZ encoder and decoder. This allows using two Filter IDs with the same initialization function and data structures. src/liblzma/common/alone_decoder.c | 1 + src/liblzma/common/alone_encoder.c | 1 + src/liblzma/common/common.h | 7 +++++-- src/liblzma/common/lzip_decoder.c | 1 + src/liblzma/common/microlzma_decoder.c | 1 + src/liblzma/common/microlzma_encoder.c | 1 + src/liblzma/lz/lz_decoder.c | 5 +++-- src/liblzma/lz/lz_decoder.h | 3 ++- src/liblzma/lz/lz_encoder.c | 5 +++-- src/liblzma/lz/lz_encoder.h | 3 ++- src/liblzma/lzma/lzma2_decoder.c | 3 ++- src/liblzma/lzma/lzma2_encoder.c | 3 ++- src/liblzma/lzma/lzma_decoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- 14 files changed, 26 insertions(+), 12 deletions(-) commit 1663c7676b76f4c514031797f3db1896e8100f7f Author: Lasse Collin Date: 2022-11-27 01:03:16 +0200 liblzma: Remove two FIXME comments. src/liblzma/common/filter_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 11fe708db783ac36ebeeb85da164e29e8c300910 Author: Lasse Collin Date: 2022-11-26 22:25:30 +0200 xz: Use lzma_filters_free(). src/xz/list.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit e782af9110d8499c7ac2929bc871540eefea5ea1 Author: Lasse Collin Date: 2022-11-26 22:21:13 +0200 liblzma: Use lzma_filters_free() in more places. src/liblzma/common/block_header_decoder.c | 20 ++------------------ src/liblzma/common/stream_decoder.c | 4 +--- src/liblzma/common/stream_decoder_mt.c | 22 +++++----------------- 3 files changed, 8 insertions(+), 38 deletions(-) commit 90caaded2dc6db1d6a55b01160d7e87f4a423628 Author: Lasse Collin Date: 2022-11-25 18:04:37 +0200 liblzma: Omit simple coder init functions if they are disabled. src/liblzma/simple/arm.c | 4 ++++ src/liblzma/simple/armthumb.c | 4 ++++ src/liblzma/simple/ia64.c | 4 ++++ src/liblzma/simple/powerpc.c | 4 ++++ src/liblzma/simple/sparc.c | 4 ++++ src/liblzma/simple/x86.c | 4 ++++ 6 files changed, 24 insertions(+) commit 5cd9f0df78cc4f8a7807bf6104adea13034fbb45 Author: Lasse Collin Date: 2022-11-24 23:24:59 +0200 xz: Allow nice_len 2 and 3 even if match finder requires 3 or 4. Now that liblzma accepts these, we avoid the extra check and there's one message less for translators too. src/xz/options.c | 5 ----- 1 file changed, 5 deletions(-) commit 3be88ae071371caa279b44e13f4836fb178fe4ae Author: Lasse Collin Date: 2022-11-24 23:23:55 +0200 liblzma: Allow nice_len 2 and 3 even if match finder requires 3 or 4. That is, if the specified nice_len is smaller than the minimum of the match finder, silently use the match finder's minimum value instead of reporting an error. The old behavior is annoying to users and it complicates xz options handling too. src/liblzma/lz/lz_encoder.c | 14 +++++++++----- src/liblzma/lz/lz_encoder.h | 9 +++++++++ src/liblzma/lzma/lzma_encoder.c | 11 ++++++++--- 3 files changed, 26 insertions(+), 8 deletions(-) commit 93439cfafe1768b3b18d67d2356ef7e7559bba59 Author: Lasse Collin Date: 2022-11-24 16:25:10 +0200 liblzma: Add lzma_filters_update() support to the multi-threaded encoder. A tiny downside of this is that now a 1-4 tiny allocations are made for every Block because each worker thread needs its own copy of the filter chain. src/liblzma/api/lzma/filter.h | 36 +++++++------ src/liblzma/common/stream_encoder_mt.c | 96 +++++++++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 23 deletions(-) commit 17ac51e689794eb41cab3e80946fec689caea2d2 Author: Lasse Collin Date: 2022-11-24 14:53:22 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 06824396b2b6c84f3a235cb7c19c2a9701167797 Author: Lasse Collin Date: 2022-11-24 14:52:44 +0200 Build: Don't put GNU/Linux-specific symbol versions into static liblzma. It not only makes no sense to put symbol versions into a static library but it can also cause breakage. By default Libtool #defines PIC if building a shared library and doesn't define it for static libraries. This is documented in the Libtool manual. It can be overriden using --with-pic or --without-pic. configure.ac detects if --with-pic or --without-pic is used and then gives an error if neither --disable-shared nor --disable-static was used at the same time. Thus, in normal situations it works to build both shared and static library at the same time on GNU/Linux, only --with-pic or --without-pic requires that only one type of library is built. Thanks to John Paul Adrian Glaubitz from Debian for reporting the problem that occurred on ia64: https://www.mail-archive.com/xz-devel@tukaani.org/msg00610.html CMakeLists.txt | 5 +- configure.ac | 143 +++++++++++++++++++++++++++++--------------- src/liblzma/common/common.h | 12 ++++ 3 files changed, 111 insertions(+), 49 deletions(-) commit e1acf7107291f8b3d6d609a7133331ff36d35d14 Author: Lasse Collin Date: 2022-11-24 01:32:16 +0200 liblzma: Refactor to use lzma_filters_free(). lzma_filters_free() sets the options to NULL and ids to LZMA_VLI_UNKNOWN so there is no need to do it by caller; the filter arrays will always be left in a safe state. Also use memcpy() instead of a loop to copy a filter chain when it is known to be safe to copy LZMA_FILTERS_MAX + 1 (even if the elements past the terminator might be uninitialized). src/liblzma/common/stream_encoder.c | 16 ++++------------ src/liblzma/common/stream_encoder_mt.c | 11 ++--------- 2 files changed, 6 insertions(+), 21 deletions(-) commit cb05dbcf8b868441ec805016222f3fd77f1c5caa Author: Lasse Collin Date: 2022-11-24 01:26:37 +0200 liblzma: Fix another invalid free() after memory allocation failure. This time it can happen when lzma_stream_encoder_mt() is used to reinitialize an existing multi-threaded Stream encoder and one of 1-4 tiny allocations in lzma_filters_copy() fail. It's very similar to the previous bug 10430fbf3820dafd4eafd38ec8be161a6978ed2b, happening with an array of lzma_filter structures whose old options are freed but the replacement never arrives due to a memory allocation failure in lzma_filters_copy(). src/liblzma/common/stream_encoder_mt.c | 4 ++++ 1 file changed, 4 insertions(+) commit 75f1a6c26df4ce329da0882786403e3ccf5cd898 Author: Jia Tan Date: 2022-05-05 20:53:42 +0800 liblzma: Add support for LZMA_SYNC_FLUSH in the Block encoder. The documentation mentions that lzma_block_encoder() supports LZMA_SYNC_FLUSH but it was never added to supported_actions[] in the internal structure. Because of this, LZMA_SYNC_FLUSH could not be used with the Block encoder unless it was the next coder after something like stream_encoder() or stream_encoder_mt(). src/liblzma/common/block_encoder.c | 1 + 1 file changed, 1 insertion(+) commit d0901645170b638c517f5c50866b6ef48f491c65 Author: Lasse Collin Date: 2022-11-24 01:02:50 +0200 liblzma: Add new API function lzma_filters_free(). This is small but convenient and should have been added a long time ago. src/liblzma/api/lzma/filter.h | 21 +++++++++++++++++++++ src/liblzma/common/filter_common.c | 26 ++++++++++++++++++++++++++ src/liblzma/liblzma_generic.map | 1 + src/liblzma/liblzma_linux.map | 1 + 4 files changed, 49 insertions(+) commit ae1f8a723dcde2f2c5cf444bcbb5fc5026b3c3c5 Author: Lasse Collin Date: 2022-11-24 00:02:31 +0200 CMake: Don't use symbol versioning with static library. CMakeLists.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 48c1b99dc537a27e1ca929d8837e778e5ba32191 Author: Lasse Collin Date: 2022-11-23 21:55:22 +0200 liblzma: Add lzma_attr_warn_unused_result to lzma_filters_copy(). src/liblzma/api/lzma/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 10430fbf3820dafd4eafd38ec8be161a6978ed2b Author: Lasse Collin Date: 2022-11-23 21:26:21 +0200 liblzma: Fix invalid free() after memory allocation failure. The bug was in the single-threaded .xz Stream encoder in the code that is used for both re-initialization and for lzma_filters_update(). To trigger it, an application had to either re-initialize an existing encoder instance with lzma_stream_encoder() or use lzma_filters_update(), and then one of the 1-4 tiny allocations in lzma_filters_copy() (called from stream_encoder_update()) must fail. An error was correctly reported but the encoder state was corrupted. This is related to the recent fix in f8ee61e74eb40600445fdb601c374d582e1e9c8a which is good but it wasn't enough to fix the main problem in stream_encoder.c. src/liblzma/common/stream_encoder.c | 39 +++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) commit cafd6dc397ca8b5b5f7775e8d6876b8fe70f8e70 Author: Lasse Collin Date: 2022-11-22 16:37:15 +0200 liblzma: Fix language in a comment. src/liblzma/common/stream_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c392bf8ccba857baaf50399c4b460119befacd54 Author: Lasse Collin Date: 2022-11-22 11:20:17 +0200 liblzma: Fix infinite loop in LZMA encoder init with dict_size >= 2 GiB. The encoder doesn't support dictionary sizes larger than 1536 MiB. This is validated, for example, when calculating the memory usage via lzma_raw_encoder_memusage(). It is also enforced by the LZ part of the encoder initialization. However, LZMA encoder with LZMA_MODE_NORMAL did an unsafe calculation with dict_size before such validation and that results in an infinite loop if dict_size was 2 << 30 or greater. src/liblzma/lzma/lzma_encoder.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) commit f50534c973a591ccf65485adfc827a8a7126ca6c Author: Lasse Collin Date: 2022-11-21 13:02:33 +0200 liblzma: Fix two Doxygen commands in the API headers. These were caught by clang -Wdocumentation. src/liblzma/api/lzma/hardware.h | 2 +- src/liblzma/api/lzma/index_hash.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 649d4872ed2f55196114a061d45b416fc4353569 Author: Lasse Collin Date: 2022-11-19 19:09:55 +0200 xz: Refactor duplicate code from hardware_memlimit_mtenc_get(). src/xz/hardware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit d327743bb547a53364e5951a16e5f1663fe4b9ff Author: Lasse Collin Date: 2022-11-19 19:06:13 +0200 xz: Add support --threads=+N so that -T+1 gives threaded mode. src/xz/args.c | 18 +++++++++++++++--- src/xz/hardware.c | 17 +++++++++++++++-- src/xz/hardware.h | 1 + src/xz/xz.1 | 21 ++++++++++++++++++++- 4 files changed, 51 insertions(+), 6 deletions(-) commit a11a2b8b5e830ba682c1d81aaa7078842b296995 Author: Jia Tan Date: 2022-11-19 23:18:04 +0800 CMake: Adds test_memlimit to CMake tests CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) commit 2af8d9e9b3f44f62d19e7c39297ec63af2e8c64f Author: Lasse Collin Date: 2022-11-15 19:10:21 +0200 Translations: Update the Korean translation. po/ko.po | 652 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 371 insertions(+), 281 deletions(-) commit 16ac05677292f7e21a4feaddcfb2ab062ea5f385 Author: Lasse Collin Date: 2022-11-15 19:09:28 +0200 Translations: Update the Turkish translation. po/tr.po | 568 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 310 insertions(+), 258 deletions(-) commit b9a67d9a5fa207062d4aa8a01639234609315d31 Author: Lasse Collin Date: 2022-11-15 10:58:39 +0200 Bump version number for 5.3.4alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma_generic.map | 2 +- src/liblzma/liblzma_linux.map | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) commit 5b999ba289b3280457b7386b9ac65dbbdf1575a5 Author: Lasse Collin Date: 2022-11-15 10:54:40 +0200 Add NEWS for 5.3.4alpha. NEWS | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) commit ce8db9e37da4f6c87691c5066f51f91f2411c44a Author: Lasse Collin Date: 2022-11-15 10:54:08 +0200 Add NEWS for 5.2.8. NEWS | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) commit b56bc8251d2736224af6bdaaae734ceb8926a879 Author: Lasse Collin Date: 2022-11-14 23:19:57 +0200 Revert "liblzma: Simple/BCJ filters: Allow disabling generic BCJ options." This reverts commit 177bdc922cb17bd0fd831ab8139dfae912a5c2b8 and also does equivalent change to arm64.c. Now that ARM64 filter will use lzma_options_bcj, this change is not needed anymore. src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/arm64.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 4 ++-- src/liblzma/simple/simple_private.h | 2 +- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 3 +-- 9 files changed, 10 insertions(+), 11 deletions(-) commit 8370ec8edf9ddf8d1d9fef03d8d1027503ec4c35 Author: Lasse Collin Date: 2022-11-14 23:14:41 +0200 Replace the experimental ARM64 filter with a new experimental version. This is incompatible with the previous version. This has space/tab fixes in filter_*.c and bcj.h too. src/liblzma/api/lzma/bcj.h | 41 +----- src/liblzma/common/filter_common.c | 14 +- src/liblzma/common/filter_decoder.c | 12 +- src/liblzma/common/filter_encoder.c | 17 +-- src/liblzma/simple/arm64.c | 283 ++++++++++++++---------------------- src/liblzma/simple/simple_decoder.h | 4 - src/liblzma/simple/simple_encoder.h | 2 - src/xz/args.c | 2 +- src/xz/message.c | 13 +- src/xz/options.c | 39 ----- src/xz/options.h | 7 - 11 files changed, 147 insertions(+), 287 deletions(-) commit f644473a211394447824ea00518d0a214ff3f7f2 Author: Lasse Collin Date: 2022-11-14 21:34:57 +0200 liblzma: Add fast CRC64 for 32/64-bit x86 using SSSE3 + SSE4.1 + CLMUL. It also works on E2K as it supports these intrinsics. On x86-64 runtime detection is used so the code keeps working on older processors too. A CLMUL-only build can be done by using -msse4.1 -mpclmul in CFLAGS and this will reduce the library size since the generic implementation and its 8 KiB lookup table will be omitted. On 32-bit x86 this isn't used by default for now because by default on 32-bit x86 the separate assembly file crc64_x86.S is used. If --disable-assembler is used then this new CLMUL code is used the same way as on 64-bit x86. However, a CLMUL-only build (-msse4.1 -mpclmul) won't omit the 8 KiB lookup table on 32-bit x86 due to a currently-missing check for disabled assembler usage. The configure.ac check should be such that the code won't be built if something in the toolchain doesn't support it but --disable-clmul-crc option can be used to unconditionally disable this feature. CLMUL speeds up decompression of files that have compressed very well (assuming CRC64 is used as a check type). It is know that the CLMUL code is significantly slower than the generic code for tiny inputs (especially 1-8 bytes but up to 16 bytes). If that is a real-world problem then there is already a commented-out variant that uses the generic version for small inputs. Thanks to Ilya Kurdyukov for the original patch which was derived from a white paper from Intel [1] (published in 2009) and public domain code from [2] (released in 2016). [1] https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf [2] https://github.com/rawrunprotected/crc CMakeLists.txt | 26 ++- INSTALL | 12 ++ configure.ac | 59 +++++- src/liblzma/check/crc64_fast.c | 449 +++++++++++++++++++++++++++++++++++++++- src/liblzma/check/crc64_table.c | 21 +- 5 files changed, 554 insertions(+), 13 deletions(-) commit 3b466bc79672bb2b06d1245a500588e6026e0ba0 Author: Lasse Collin Date: 2022-11-14 20:14:34 +0200 Translations: Update the Swedish translation one more time. po/sv.po | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit e963379a8622ebdff6ce78e76b803bcd1e1d16d6 Author: Lasse Collin Date: 2022-11-14 19:34:15 +0200 Translations: Update the Swedish translation again. po/sv.po | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) commit a4bc689a823a2254f29ac9d233170add5121b307 Author: Lasse Collin Date: 2022-11-14 19:07:45 +0200 Translations: Update the Swedish translation. po/sv.po | 671 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 382 insertions(+), 289 deletions(-) commit bbf2073d824ab4ba33bed4b77f467435abd333a5 Author: Lasse Collin Date: 2022-11-14 18:58:09 +0200 Translations: Update the Ukrainian translation. po/uk.po | 618 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 354 insertions(+), 264 deletions(-) commit ac10b1b3622e70881595586edfb8a3ebdcd76bb6 Author: Lasse Collin Date: 2022-11-14 17:58:07 +0200 Build: Omit x86_64 from --enable-assembler. It didn't do anything. There are only 32-bit x86 assembly files and it feels likely that new files won't be added as intrinsics in C are more portable across toolchains and OSes. configure.ac | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit eb0f1450ad9f23dac03050d9c8375980240aee21 Author: Lasse Collin Date: 2022-11-14 16:00:52 +0200 liblzma: Use __attribute__((__constructor__)) if available. This uses it for CRC table initializations when using --disable-small. It avoids mythread_once() overhead. It also means that then --disable-small --disable-threads is thread-safe if this attribute is supported. CMakeLists.txt | 15 +++++++++++++++ INSTALL | 4 +++- configure.ac | 31 ++++++++++++++++++++++++++++--- src/liblzma/check/crc32_small.c | 7 +++++++ src/liblzma/check/crc64_small.c | 5 +++++ src/liblzma/lz/lz_encoder.c | 2 +- 6 files changed, 59 insertions(+), 5 deletions(-) commit 6553f49b11dafad35c73b05f12e14865ea1fd8a1 Author: Lasse Collin Date: 2022-11-12 21:19:52 +0200 Translations: Update the Romanian translation. po/ro.po | 651 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 380 insertions(+), 271 deletions(-) commit db97e69e12393becc29f8febd53133d0d36989bd Author: Lasse Collin Date: 2022-11-12 21:17:45 +0200 Translations: Update the Hungarian translation. po/hu.po | 625 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 357 insertions(+), 268 deletions(-) commit 2bbb9c0f3829a8b121b36998d273a6c6f92000f4 Author: Lasse Collin Date: 2022-11-11 17:58:57 +0200 Translations: Update the Finnish translation. po/fi.po | 610 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 348 insertions(+), 262 deletions(-) commit 3c8cbb8137b6f8ed9416c1209d73cdbcb015251f Author: Lasse Collin Date: 2022-11-11 17:58:18 +0200 Translations: Update the Croatian translation. po/hr.po | 680 +++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 381 insertions(+), 299 deletions(-) commit 26c3359eac0988d6f3986735cd1363bec1678e8e Author: Lasse Collin Date: 2022-11-11 17:57:18 +0200 Translations: Update the Polish translation. po/pl.po | 569 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 309 insertions(+), 260 deletions(-) commit 577e467b137c735afb8de6ae71ac7a73c2960cc4 Author: Lasse Collin Date: 2022-11-11 17:56:44 +0200 Translations: Update the Spanish translation. po/es.po | 598 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 344 insertions(+), 254 deletions(-) commit f9b4ff6e9a0f1678650775582d3e4fe782abce97 Author: Lasse Collin Date: 2022-11-11 17:16:03 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit a39961ef211e1bf030b17edeea3cff29fe263b67 Author: Lasse Collin Date: 2022-11-11 17:15:25 +0200 liblzma: Fix building with Intel ICC (the classic compiler). It claims __GNUC__ >= 10 but doesn't support __symver__ attribute. Thanks to Stephen Sachs. src/liblzma/common/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c715f683dcb1a817d565da292cddfbceda643e12 Author: Lasse Collin Date: 2022-11-11 14:35:58 +0200 liblzma: Fix incorrect #ifdef for x86 SSE2 support. __SSE2__ is the correct macro for SSE2 support with GCC, Clang, and ICC. __SSE2_MATH__ means doing floating point math with SSE2 instead of 387. Often the latter macro is defined if the first one is but it was still a bug. src/liblzma/common/memcmplen.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit bd334ae56afe7f642ad4d0f1ac19e74e82daa1ce Author: Lasse Collin Date: 2022-11-11 13:27:06 +0200 Add NEWS for 5.2.7 (forgotten cherry-pick from v5.2). NEWS | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) commit 3c7860cf49de6f81046b3a4034a89f3a4803a576 Author: Lasse Collin Date: 2022-11-11 13:16:21 +0200 xzdiff: Add support for .lz files. The other scripts don't need changes for .lz support because in those scripts it is enough that xz supports .lz. src/scripts/xzdiff.in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit d76c752a6d77052e5ad57ade555082585f7ac5d8 Author: Lasse Collin Date: 2022-11-11 12:23:58 +0200 Scripts: Ignore warnings from xz. In practice this means making the scripts work when the input files have an unsupported check type which isn't a problem in practice unless support for some check types has been disabled at build time. src/scripts/xzdiff.in | 5 +++-- src/scripts/xzgrep.in | 2 +- src/scripts/xzless.in | 4 ++-- src/scripts/xzmore.in | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) commit 6552535afd1fe29d726ab6e68cf14ce3624fd48c Author: Lasse Collin Date: 2022-11-10 12:34:43 +0200 Translations: Rename poa4/fr_FR.po to po4a/fr.po. That's how it is preferred at the Translation Project. On my system /usr/share/man/fr_FR doesn't contain any other man pages than XZ Utils while /usr/share/man/fr has quite a few, so this will fix that too. Thanks to Benno Schulenberg from the Translation Project. po4a/{fr_FR.po => fr.po} | 0 po4a/po4a.conf | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) commit 0918159ce4c75bfb60aff0193b559f8a9f41d25a Author: Lasse Collin Date: 2022-11-09 18:48:50 +0200 xz: Update the man page about BCJ filters, including upcoming --arm64. The --arm64 isn't actually implemented yet in the form described in this commit. Thanks to Jia Tan. src/xz/xz.1 | 66 +++++++++++++++++++++++++++---------------------------------- 1 file changed, 29 insertions(+), 37 deletions(-) commit ba2ae3596f6be1587495f33b367488f6e00e56f1 Author: Lasse Collin Date: 2022-11-09 18:14:14 +0200 xz: Add --arm64 to --long-help and omit endianness from ARM(-Thumb). Modern 32-bit ARM in big endian mode use little endian for instruction encoding still, so the filters work on such executables too. It's likely less confusing for users this way. The --arm64 option hasn't been implemented yet (there is --experimental-arm64 but it's different). The --arm64 option is added now anyway because this is the likely result and the strings need to be ready for translators. Thanks to Jia Tan. src/xz/message.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 802d57d9215d9c81dbee86edb43c9e93a7f7ec55 Author: Lasse Collin Date: 2022-11-09 15:12:13 +0200 Windows: Update the VS project files for ARM64 and .lz support. windows/vs2013/config.h | 9 +++++++++ windows/vs2013/liblzma.vcxproj | 5 ++++- windows/vs2013/liblzma_dll.vcxproj | 5 ++++- windows/vs2017/config.h | 9 +++++++++ windows/vs2017/liblzma.vcxproj | 3 +++ windows/vs2017/liblzma_dll.vcxproj | 3 +++ windows/vs2019/config.h | 9 +++++++++ windows/vs2019/liblzma.vcxproj | 5 ++++- windows/vs2019/liblzma_dll.vcxproj | 5 ++++- 9 files changed, 49 insertions(+), 4 deletions(-) commit 5846aeda05972bc803c6094821ae836229ebe691 Author: Lasse Collin Date: 2022-11-09 14:57:48 +0200 DOS: Update Makefile and config.h to include ARM64 and .lz support. dos/Makefile | 2 ++ dos/config.h | 9 +++++++++ 2 files changed, 11 insertions(+) commit 781da8d6c44de6aa278c916375250668a0b107f2 Author: Lasse Collin Date: 2022-11-09 14:45:05 +0200 CMake: Add lzip decoder files and #define to the build. CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) commit df8ad4af65a9c4846b108550d0083770a69dee64 Author: Lasse Collin Date: 2022-11-09 14:41:56 +0200 Docs: Update INSTALL and also add new prohibited options to PACKAGERS. INSTALL | 49 +++++++++++++++++++++++++++++++++++++++++-------- PACKAGERS | 2 ++ 2 files changed, 43 insertions(+), 8 deletions(-) commit c8ef089c149afaab413c3a51be827dd1d11afe0e Author: Lasse Collin Date: 2022-10-20 17:39:06 +0300 Tests: Test the .lz files in test_files.sh. tests/test_files.sh | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) commit c8f70ebb4628ceb6cb29cc9195d9deadf69d2bd7 Author: Lasse Collin Date: 2022-10-20 15:35:59 +0300 Tests: Add .lz (lzip) test files. tests/files/README | 109 +++++++++++++++++++++++++++++---- tests/files/bad-1-v0-uncomp-size.lz | Bin 0 -> 42 bytes tests/files/bad-1-v1-crc32.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-dict-1.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-dict-2.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-magic-1.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-magic-2.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-member-size.lz | Bin 0 -> 50 bytes tests/files/bad-1-v1-trailing-magic.lz | Bin 0 -> 54 bytes tests/files/bad-1-v1-uncomp-size.lz | Bin 0 -> 50 bytes tests/files/good-1-v0-trailing-1.lz | Bin 0 -> 59 bytes tests/files/good-1-v0.lz | Bin 0 -> 42 bytes tests/files/good-1-v1-trailing-1.lz | Bin 0 -> 67 bytes tests/files/good-1-v1-trailing-2.lz | Bin 0 -> 70 bytes tests/files/good-1-v1.lz | Bin 0 -> 50 bytes tests/files/good-2-v0-v1.lz | Bin 0 -> 78 bytes tests/files/good-2-v1-v0.lz | Bin 0 -> 78 bytes tests/files/good-2-v1-v1.lz | Bin 0 -> 86 bytes tests/files/unsupported-1-v234.lz | Bin 0 -> 50 bytes 19 files changed, 98 insertions(+), 11 deletions(-) commit 731db13e6fa3ad3e3fc786c0ccf6eac4cce6865f Author: Lasse Collin Date: 2022-10-19 22:32:51 +0300 xz: Remove the commented-out FORMAT_GZIP, gzip, .gz, and .tgz. src/xz/args.c | 2 -- src/xz/coder.h | 1 - src/xz/suffix.c | 9 --------- 3 files changed, 12 deletions(-) commit 3176f992c55b8d788c4633809aaf9447376a5a12 Author: Lasse Collin Date: 2022-10-08 21:28:15 +0300 xz: Add .lz (lzip) decompression support. If configured with --disable-lzip-decoder then --long-help will still list `lzip' in --format but I left it like that since due to translations it would be messy to have two help strings. Features are disabled only in special situations so wrong help in such a situation shouldn't matter much. Thanks to Michał Górny for the original patch. src/xz/args.c | 9 ++++++++ src/xz/coder.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/xz/coder.h | 3 +++ src/xz/message.c | 2 +- src/xz/suffix.c | 26 ++++++++++++++++++---- src/xz/xz.1 | 46 +++++++++++++++++++++++++++++++++----- 6 files changed, 141 insertions(+), 13 deletions(-) commit 034086e1ae1459210837a24e04878435c86dc41b Author: Lasse Collin Date: 2022-10-08 00:29:20 +0300 liblzma: Add .lz support to lzma_auto_decoder(). Thanks to Michał Górny for the original patch. src/liblzma/api/lzma/container.h | 10 ++++++---- src/liblzma/common/Makefile.inc | 3 ++- src/liblzma/common/auto_decoder.c | 23 +++++++++++++++++------ src/liblzma/common/lzip_decoder.h | 22 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 11 deletions(-) commit 0538db038f3cdc352007dacb42454aa1806b8e40 Author: Lasse Collin Date: 2022-10-06 15:50:20 +0300 liblzma: Add .lz (lzip) decompression support (format versions 0 and 1). Support for format version 0 was removed from lzip 1.18 for some reason. .lz format version 0 files are rare (and old) but some source packages were released in this format, and some people might have personal files in this format too. It's very little extra code to support it along side format version 1 so this commits adds support for both. The Sync Flush marker extentension to the original .lz format version 1 isn't supported. It would require changes to the LZMA decoder itself. Such files are very rare anyway. See the API doc for lzma_lzip_decoder() for more details about the .lz format support. Thanks to Michał Górny for the original patch. configure.ac | 21 ++ src/liblzma/api/lzma/container.h | 62 +++++- src/liblzma/common/Makefile.inc | 5 + src/liblzma/common/lzip_decoder.c | 413 ++++++++++++++++++++++++++++++++++++++ src/liblzma/liblzma_generic.map | 1 + src/liblzma/liblzma_linux.map | 1 + 6 files changed, 501 insertions(+), 2 deletions(-) commit 633d48a075b9ce4b9c08a7a56a7eb4cabc18100c Author: Lasse Collin Date: 2022-11-09 14:17:23 +0200 liblzma: Add the missing Makefile.inc change for --disable-microlzma. This was forgotten from commit 59c4d6e1390f6f4176f43ac1dad1f7ac03c449b8. src/liblzma/common/Makefile.inc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit 724285dadbdc88765c8fb83eab9816575a260966 Author: Lasse Collin Date: 2022-11-09 14:10:52 +0200 xz: Add comments about stdin and src_st.st_size. "xz -v < regular_file > out.xz" doesn't display the percentage and estimated remaining time because it doesn't even try to check the input file size when input is read from stdin. This could be improved but for now there's just a comment to remind about it. src/xz/coder.c | 9 +++++++++ src/xz/file_io.c | 4 ++++ 2 files changed, 13 insertions(+) commit f723eec68b0e44234910f669a29119de33018967 Author: Lasse Collin Date: 2022-11-09 12:48:22 +0200 xz: Fix displaying of file sizes in progress indicator in passthru mode. It worked for one input file since the counters are zero when xz starts but they weren't reset when starting a new file in passthru mode. For example, if files A, B, and C are one byte each, then "xz -dcvf A B C" would show file sizes as 1, 2, and 3 bytes instead of 1, 1, and 1 byte. src/xz/coder.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 69265d0f223ddf1d66f799b8b047df22923e376f Author: Lasse Collin Date: 2022-11-09 11:27:20 +0200 xz: Add a comment why --to-stdout is not in --help. It is on the man page still. src/xz/message.c | 3 +++ 1 file changed, 3 insertions(+) commit fe6b8852a3c6a0eb5a3c33512e0a69af257d3bc7 Author: Lasse Collin Date: 2022-11-08 23:05:37 +0200 xz: Make xz -lvv show that the upcoming --arm64 needs 5.4.0 to decompress. src/xz/list.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) commit fb3f05ac9f2b4b0e3643401960fbeab31997ac7a Author: Lasse Collin Date: 2022-11-08 22:26:54 +0200 Docs: Update faq.txt a little. doc/faq.txt | 66 ++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 23 deletions(-) commit 05331f091ec3b68eccbfb2a9a7a576072768fb4b Author: Lasse Collin Date: 2022-11-08 16:57:17 +0200 Translations: Update Turkish translation. po/tr.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ed3a4822963b4940d84e6f44d47277c394fc046d Author: Lasse Collin Date: 2022-11-08 14:55:32 +0200 Translations: Update Croatian translation. po/hr.po | 190 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 96 insertions(+), 94 deletions(-) commit 4746f5ec721316bc4c6fec9905b2902e0360e0af Author: Lasse Collin Date: 2022-11-08 14:13:03 +0200 liblzma: Update API docs about decoder flags. src/liblzma/api/lzma/container.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) commit 8779a9db5d0cec00c9dc9e9965dd2dda04f9d80d Author: Lasse Collin Date: 2022-11-08 14:01:50 +0200 liblzma: Use the return_if_error() macro in alone_decoder.c. src/liblzma/common/alone_decoder.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit 3f4990b6822961e75cd9b4e2e82b1df63f6f8fcc Author: Lasse Collin Date: 2022-11-08 14:00:58 +0200 liblzma: Fix a comment in auto_decoder.c. src/liblzma/common/auto_decoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 026a5897c72a2041ae08ceec54ce8b1cdeb51334 Author: Lasse Collin Date: 2022-11-08 13:43:19 +0200 xz: Initialize the pledge(2) sandbox at the very beginning of main(). It feels better that the initializations are sandboxed too. They don't do anything that the pledge() call wouldn't allow. src/xz/main.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) commit 49a59f6ca001c3ce9affa2c162b437aad021b4d5 Author: Lasse Collin Date: 2022-11-07 22:51:16 +0200 xz: Extend --robot --info-memory output. Now it includes everything that the human-readable --info-memory shows. src/xz/hardware.c | 24 +++++++++++++++--------- src/xz/xz.1 | 47 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 15 deletions(-) commit 5e2450c75cbac966c62cf2231c824f2cc91ddba8 Author: Lasse Collin Date: 2022-11-07 17:22:04 +0200 liblzma: Include cached memory in reported memusage in threaded decoder. This affects lzma_memusage() and lzma_memlimit_set() when used with the threaded decompressor. Now all allocations are reported by lzma_memusage() (so it's not misleading) and lzma_memlimit_set() cannot lower the limit below that value. The alternative would have been to allow lowering the limit if doing so is possible by freeing the cached memory but since the primary use case of lzma_memlimit_set() is to increase memlimit after LZMA_MEMLIMIT_ERROR this simple approach was selected. The cached memory was always included when enforcing the memory usage limit while decoding. Thanks to Jia Tan. src/liblzma/common/stream_decoder_mt.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) commit 1fc6e7dd1fabdb60124d449b99273330ccab3ff1 Author: Jia Tan Date: 2022-11-07 16:24:14 +0200 xz: Avoid a compiler warning in progress_speed() in message.c. This should be smaller too since it avoids the string constants. src/xz/message.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) commit cf118c014683069b5dbe91898acdc40f2f0a1f5d Author: Lasse Collin Date: 2022-10-31 16:26:05 +0200 Build: Clarify comment in configure.ac about SSE2. configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit aad3c609ffb72f581a7a2b67be3ad70b2b327840 Author: Lasse Collin Date: 2022-10-31 16:16:37 +0200 Build: Remove obsolete commented-out lines from configure.ac. configure.ac | 4 ---- 1 file changed, 4 deletions(-) commit e53e0e2186c6b8ce866bd19aec52f1c318ed31ba Author: Lasse Collin Date: 2022-10-31 13:31:58 +0200 Windows: Fix mythread_once() macro with Vista threads. Don't call InitOnceComplete() if initialization was already done. So far mythread_once() has been needed only when building with --enable-small. windows/build.bash does this together with --disable-threads so the Vista-specific mythread_once() is never needed by those builds. VS project files or CMake-builds don't support HAVE_SMALL builds at all. src/common/mythread.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 48dde3bab9dc04081acb5aa7cf7c5044b8a49f58 Author: Lasse Collin Date: 2022-10-31 11:54:44 +0200 liblzma: Silence -Wconversion warning from crc64_fast.c. src/liblzma/check/crc64_fast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit a243c617ff249d915ac123de4f536b80322c1fdb Author: Lasse Collin Date: 2022-10-31 11:49:47 +0200 CMake: Sync tuklib_cpucores.cmake with tuklib_cpucores.m4. This was forgotten from commit 2611c4d90535652d3eb7ef4a026a6691276fab43. cmake/tuklib_cpucores.cmake | 5 +++++ 1 file changed, 5 insertions(+) commit 05c72de06fcaaedc78f8abba7d5ec568ddcf1e75 Author: Lasse Collin Date: 2022-10-27 15:49:18 +0300 Tests: test_files.sh: Make it not fail if features were disabled at build. It now tries to test as many files as easily possible. The exit status indicates skipping if any of the files were skipped. This way it is easy to notice if something is being skipped when it isn't expected. tests/test_files.sh | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) commit b3459327a51f4b8239d19e6c34b4e0c6bc2d81de Author: Lasse Collin Date: 2022-10-27 15:30:13 +0300 Tests: test_files.sh: Suppress an expected warning from the log. xz (but not xzdec) will normally warn about unsupported check but since we are testing specifically such a file, it's better to silence that warning so that it doesn't look suspicious in test_files.sh.log. The use of -q and -Q in xzdec is just for consistency and doesn't affect the result at least for now. tests/test_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 798c86e4231e0835ab76ccd0810c8ea30833b2ce Author: Lasse Collin Date: 2022-10-27 15:27:50 +0300 Tests: test_files.sh: Print the reason for skipping if xz & xzdec missing. tests/test_files.sh | 1 + 1 file changed, 1 insertion(+) commit c1dd8524e1af07f16b790463899de06a6a5fcc08 Author: Lasse Collin Date: 2022-10-27 01:12:40 +0300 Tests: Keep test_compress_* working when some filters are unavailable. tests/test_compress.sh | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) commit ce30ada91951d0746879ae438da11f1ee8a90aa0 Author: Jia Tan Date: 2022-10-23 21:01:08 +0800 Tests: test_bcj_exact_size skips properly now if PowerPC filter disabled. tests/test_bcj_exact_size.c | 3 +++ 1 file changed, 3 insertions(+) commit 89c5cfcacaca7130509fac836e2f30c46b824502 Author: Lasse Collin Date: 2022-10-26 00:05:57 +0300 Tests: Test also unsupported-*.xz. tests/test_files.sh | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) commit a4b214b93ac741edef9c41e55865b0b867ca2587 Author: Lasse Collin Date: 2022-10-25 23:45:03 +0300 Build: Use AC_CONFIG_HEADERS instead of the ancient AC_CONFIG_HEADER. We require Autoconf >= 2.69 and that has AC_CONFIG_HEADERS. There is a warning about AC_PROG_CC_C99 being obsolete but it cannot be removed because it is needed with Autoconf 2.69. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 04f299b64e73f50afc188c2590ebebc6b73ed744 Author: Lasse Collin Date: 2022-10-25 23:31:44 +0300 Build: Update m4/ax_pthread.m4 from Autoconf Archive. m4/ax_pthread.m4 | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) commit 59c4d6e1390f6f4176f43ac1dad1f7ac03c449b8 Author: Lasse Collin Date: 2022-10-25 23:28:34 +0300 Build: Add configure option --disable-microlzma. MicroLZMA was made for EROFS and used by erofs-utils. It might be used by something else in the future but those wanting a smaller build for specific situations can now disable this rarely-needed feature. configure.ac | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) commit 054ccd6d14b2cc6eddc56897af280d3221414150 Author: Lasse Collin Date: 2022-10-25 23:09:11 +0300 xz: Fix --single-stream with an empty .xz Stream. Example: $ xz -dc --single-stream good-0-empty.xz xz: good-0-empty.xz: Internal error (bug) The code, that is tries to catch some input file issues early, didn't anticipate LZMA_STREAM_END which is possible in that code only when --single-stream is used. src/xz/coder.c | 9 +++++++++ 1 file changed, 9 insertions(+) commit 563288ea705e83ff5cb292adf794650c263bca1d Author: Lasse Collin Date: 2022-10-25 21:11:58 +0300 xz: Add support for OpenBSD's pledge() sandbox. configure.ac | 12 +++++++++--- src/xz/file_io.c | 11 +++++++++++ src/xz/main.c | 13 +++++++++++++ src/xz/private.h | 2 +- 4 files changed, 34 insertions(+), 4 deletions(-) commit f9913e8ee2ba0b1e4ff4d0aa4c001aae305ed944 Author: Lasse Collin Date: 2022-10-25 19:07:17 +0300 xz: Fix decompressor behavior if input uses an unsupported check type. Now files with unsupported check will make xz display a warning, set the exit status to 2 (unless --no-warn is used), and then decompress the file normally. This is how it was supposed to work since the beginning but this was broken by the commit 231c3c7098f1099a56abb8afece76fc9b8699f05, that is, a little before 5.0.0 was released. The buggy behavior displayed a message, set exit status 1 (error), and xz didn't attempt to to decompress the file. This doesn't matter today except for special builds that disable CRC64 or SHA-256 at build time (but such builds should be used in special situations only). The bug matters if new check type is added in the future and an old xz version is used to decompress such a file; however, it's likely that such files would use a new filter too and an old xz wouldn't be able to decompress the file anyway. The first hunk in the commit is the actual fix. The second hunk is a cleanup since LZMA_TELL_ANY_CHECK isn't used in xz. There is a test file for unsupported check type but it wasn't used by test_files.sh, perhaps due to different behavior between xz and the simpler xzdec. src/xz/coder.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) commit aa4fe145b9486adc454f44fd3e09be9add808a0f Author: Lasse Collin Date: 2022-10-25 18:36:19 +0300 xz: Clarify the man page: input file isn't removed if an error occurs. src/xz/xz.1 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 8b46ae8cdeddfd7dc01fec92971b8696e9a96c5d Author: Lasse Collin Date: 2022-10-25 18:30:55 +0300 xz: Refactor to remove is_empty_filename(). Long ago it was used in list.c too but nowadays it's needed only in io_open_src() so it's nicer to avoid a separate function. src/xz/file_io.c | 4 +++- src/xz/util.c | 12 ------------ src/xz/util.h | 4 ---- 3 files changed, 3 insertions(+), 17 deletions(-) commit 85624015978b0de294cff3df79006df987c552b1 Author: Lasse Collin Date: 2022-10-25 18:23:54 +0300 xz: If input file cannot be removed, treat it as a warning, not error. Treating it as a warning (message + exit status 2) matches gzip and it seems more logical as at that point the output file has already been successfully closed. When it's a warning it is possible to suppress it with --no-warn. src/xz/file_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit fda9f85f52c546f7ca0313cf89481da4707fecb3 Author: Lasse Collin Date: 2022-10-24 16:25:09 +0300 liblzma: Threaded decoder: Stop the worker threads on errors. It's waste of CPU time and electricity to leave the unfinished worker threads running when it is known that their output will get ignored. src/liblzma/common/stream_decoder_mt.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) commit 2611c4d90535652d3eb7ef4a026a6691276fab43 Author: Lasse Collin Date: 2022-10-20 20:22:50 +0300 tuklib_cpucores: Use HW_NCPUONLINE on OpenBSD. On OpenBSD the number of cores online is often less than what HW_NCPU would return because OpenBSD disables simultaneous multi-threading (SMT) by default. Thanks to Christian Weisgerber. m4/tuklib_cpucores.m4 | 5 +++++ src/common/tuklib_cpucores.c | 9 +++++++++ 2 files changed, 14 insertions(+) commit 424ac91c7e0419393ff2bde4f62e21fa611c776d Author: Lasse Collin Date: 2022-10-19 19:39:35 +0300 Tests: Skip tests in test_*.sh if encoders and/or decoders are disabled. This isn't perfect as the scripts can still fail if only certain filters are disabled. This is still an improvement as now "make check" has better behavior when all encoders or decoders are disabled. Grepping ../config.h is simple and fairly clean but it only works if config.h was created. CMake builds don't create config.h but they don't use these test scripts either. Thanks to Sebastian Andrzej Siewior for reporting the problem. Thanks to Jia Tan for the original patch which grepped xz error messages instead of config.h. tests/test_compress.sh | 12 ++++++++++++ tests/test_files.sh | 11 +++++++++++ tests/test_scripts.sh | 11 +++++++++++ 3 files changed, 34 insertions(+) commit ca8bf9d7c5a30be8ba1eeb106fd892f19e83ed09 Author: Lasse Collin Date: 2022-10-19 18:54:34 +0300 Test: Remove the (exit 1) lines. I suspect that I used these in the original version because Autoconf's manual describes that such a trick is needed in some specific situations for portability reasons. None of those situations listed on Autoconf 2.71 manual apply to these test scripts though so this cleans them up. tests/test_compress.sh | 10 ---------- tests/test_files.sh | 9 --------- tests/test_scripts.sh | 6 ------ 3 files changed, 25 deletions(-) commit 82fcb7cfc17ce62f79ebc7ca2374e1daca5e4d5e Author: Lasse Collin Date: 2022-10-19 17:14:57 +0300 Tests: Fix a warning in test_memlimit.c when decoders are disabled. tests/test_memlimit.c | 3 +++ 1 file changed, 3 insertions(+) commit b5f8271b45b9b0e59485ffba3640ca3418835ec4 Author: Lasse Collin Date: 2022-10-19 17:11:46 +0300 Tests: Add test_memlimit to .gitignore. Thanks to Jia Tan. .gitignore | 1 + 1 file changed, 1 insertion(+) commit 6a86e81cab202d0a812a7b2e9efacaf70c58ba38 Author: Jia Tan Date: 2022-10-06 21:53:09 +0300 Tests: Refactor test_stream_flags.c. Converts test_stream_flags to tuktest. Also the test will now compile and skip properly if encoders or decoders are disabled. Thanks to Sebastian Andrzej Siewior. tests/test_stream_flags.c | 533 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 416 insertions(+), 117 deletions(-) commit 827ac5b4821491fd3afe0d0e1ddac326253aeb66 Author: Jia Tan Date: 2022-10-06 17:00:38 +0800 Tests: Refactor test_block_header.c. test_block_header now achieves higher test coverage. Also the test will now compile and skip properly if encoders or decoders are disabled. Thanks to Sebastian Andrzej Siewior. tests/test_block_header.c | 486 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 370 insertions(+), 116 deletions(-) commit 84963318952064a93bfc52edd6b0ef70593384ee Author: Jia Tan Date: 2022-10-05 23:54:12 +0800 Tests: Fix compilation issues. test_bcj_exact_size, test_check, test_hardware, and test_index will all now compile and skip properly if encoders or decoders are disabled. Also fixed a small typo (disabed -> disabled). Thanks to Sebastian Andrzej Siewior. tests/test_bcj_exact_size.c | 20 ++++++++++++++------ tests/test_check.c | 8 +++++++- tests/test_hardware.c | 2 +- tests/test_index.c | 6 ++++++ tests/test_memlimit.c | 16 +++++++++++++++- tests/test_vli.c | 13 +++++++++++++ 6 files changed, 56 insertions(+), 9 deletions(-) commit 7dcabeec63d46b436fa5f043c3d1f09d0e15be16 Author: Lasse Collin Date: 2022-10-05 16:20:47 +0300 Tests: Include mythread.h in the tests that use MYTHREAD_ENABLED. tests/test_check.c | 1 + tests/test_hardware.c | 1 + tests/test_memlimit.c | 1 + 3 files changed, 3 insertions(+) commit 14af758a770c7781af18fb66d6d21ee5b1c27f04 Author: Jia Tan Date: 2022-10-05 20:57:16 +0800 liblzma: Fix a compilation issue when encoders are disabled. When encoders were disabled and threading enabled, outqueue.c and outqueue.h were not compiled. The multi threaded decoder required these files, so compilation failed. src/liblzma/common/Makefile.inc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 6ca5c354bd4620aa7f81da68870eef1b1f26288f Author: Jia Tan Date: 2022-10-05 16:41:38 +0800 Tests: Fix compilation error when threading support has been disabled. Now tests that require threading are skipped when threading support has been disabled. Thanks to Sebastian Andrzej Siewior. tests/test_check.c | 4 ++++ tests/test_hardware.c | 4 ++++ tests/test_memlimit.c | 4 ++++ 3 files changed, 12 insertions(+) commit fae37ad2affd8fe8871f4ff93d5cab5ec14d5e58 Author: Lasse Collin Date: 2022-10-05 14:26:00 +0300 tuklib_integer: Add 64-bit endianness-converting reads and writes. Also update the comment in liblzma's memcmplen.h. Thanks to Michał Górny for the original patch for the reads. m4/tuklib_integer.m4 | 8 ++++---- src/common/tuklib_integer.h | 46 ++++++++++++++++++++++++++++++++++++++++-- src/liblzma/common/memcmplen.h | 9 +++------ 3 files changed, 51 insertions(+), 12 deletions(-) commit 508a44372c5b0dede8863fd0d358d4a9d8645c95 Author: Lasse Collin Date: 2022-09-30 12:06:13 +0300 liblzma: Add API doc note about the .xz decoder LZMA_MEMLIMIT_ERROR bug. The bug was fixed in 660739f99ab211edec4071de98889fb32ed04e98. src/liblzma/api/lzma/base.h | 11 +++++++++++ 1 file changed, 11 insertions(+) commit 8cc9874a7974cd575aee44f218836f7acdbeb0ed Author: Jia Tan Date: 2022-09-21 16:15:50 +0800 liblzma: Add dest and src NULL checks to lzma_index_cat. The documentation states LZMA_PROG_ERROR can be returned from lzma_index_cat. Previously, lzma_index_cat could not return LZMA_PROG_ERROR. Now, the validation is similar to lzma_index_append, which does a NULL check on the index parameter. src/liblzma/common/index.c | 3 +++ 1 file changed, 3 insertions(+) commit afd5a8bf5374eba82804a999e1ea7af680784086 Author: Jia Tan Date: 2022-09-21 20:29:28 +0800 Tests: Create a test for the lzma_index_cat bug. tests/test_index.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) commit 3d5a99ca373a4e86faf671226ca6487febb9eeac Author: Jia Tan Date: 2022-09-21 19:28:53 +0800 liblzma: Fix copying of check type statistics in lzma_index_cat(). The check type of the last Stream in dest was never copied to dest->checks (the code tried to copy it but it was done too late). This meant that the value returned by lzma_index_checks() would only include the check type of the last Stream when multiple lzma_indexes had been concatenated. In xz --list this meant that the summary would only list the check type of the last Stream, so in this sense this was only a visual bug. However, it's possible that some applications use this information for purposes other than merely showing it to the users in an informational message. I'm not aware of such applications though and it's quite possible that such applications don't exist. Regular streamed decompression in xz or any other application doesn't use lzma_index_cat() and so this bug cannot affect them. src/liblzma/common/index.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit a61d32172789735350a941e23baf6b587c49e5d9 Author: Lasse Collin Date: 2022-09-28 12:20:41 +0300 tuklib_physmem: Fix Unicode builds on Windows. Thanks to ArSaCiA Game. src/common/tuklib_physmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5a4d3548ab214fdca364d5c734baf1d1fab47308 Author: Lasse Collin Date: 2022-09-28 11:12:07 +0300 Tests: Add test_memlimit.c to test restarting after LZMA_MEMLIMIT_ERROR. tests/Makefile.am | 2 + tests/test_memlimit.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) commit 660739f99ab211edec4071de98889fb32ed04e98 Author: Lasse Collin Date: 2022-09-28 11:05:15 +0300 liblzma: Stream decoder: Fix restarting after LZMA_MEMLIMIT_ERROR. If lzma_code() returns LZMA_MEMLIMIT_ERROR it is now possible to use lzma_memlimit_set() to increase the limit and continue decoding. This was supposed to work from the beginning but there was a bug. With other decoders (.lzma or threaded .xz) this already worked correctly. src/liblzma/common/stream_decoder.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) commit 7e68fda58c74ad9e5b876cc22fcbe80fc0e4747b Author: Lasse Collin Date: 2022-09-28 11:00:23 +0300 liblzma: Stream decoder: Fix comments. src/liblzma/common/stream_decoder.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) commit f664cb25841fc1c478b819034a224a558e2ac6e7 Author: Lasse Collin Date: 2022-09-20 16:58:22 +0300 liblzma: ARM64: Add comments. src/liblzma/simple/arm64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) commit b557b4a0eea05470fae8ba5ef4ad5a6dfb36ac41 Author: Lasse Collin Date: 2022-09-20 16:27:50 +0300 liblzma: ARM64: Fix wrong comment in API doc. Thanks to Jia Tan. src/liblzma/api/lzma/bcj.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit d5b0906fa55157f48c200188a3951d80df9cb308 Author: Lasse Collin Date: 2022-09-19 20:24:26 +0300 xz: Add --experimental-arm64[=width=WIDTH]. It will be renamed to --arm64 once it is stable. Man page or --long-help weren't updated yet. src/xz/args.c | 7 +++++++ src/xz/message.c | 7 +++++++ src/xz/options.c | 39 +++++++++++++++++++++++++++++++++++++++ src/xz/options.h | 7 +++++++ 4 files changed, 60 insertions(+) commit ecb966de308c255bb4735a7307ef9901c643a9de Author: Lasse Collin Date: 2022-09-19 19:34:56 +0300 liblzma: Add experimental ARM64 BCJ filter with a temporary Filter ID. That is, the Filter ID will be changed once the design is final. The current version will be removed. So files created with the tempoary Filter ID won't be supported in the future. CMakeLists.txt | 3 + configure.ac | 4 +- src/liblzma/api/lzma/bcj.h | 35 +++++- src/liblzma/common/filter_common.c | 9 ++ src/liblzma/common/filter_decoder.c | 8 ++ src/liblzma/common/filter_encoder.c | 11 ++ src/liblzma/simple/Makefile.inc | 4 + src/liblzma/simple/arm64.c | 227 ++++++++++++++++++++++++++++++++++++ src/liblzma/simple/simple_coder.h | 9 ++ src/liblzma/simple/simple_decoder.h | 4 + src/liblzma/simple/simple_encoder.h | 2 + 11 files changed, 313 insertions(+), 3 deletions(-) commit 177bdc922cb17bd0fd831ab8139dfae912a5c2b8 Author: Lasse Collin Date: 2022-09-17 22:42:18 +0300 liblzma: Simple/BCJ filters: Allow disabling generic BCJ options. This will be needed for the ARM64 BCJ filter as it will use its own options struct. src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 4 ++-- src/liblzma/simple/simple_private.h | 2 +- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 3 ++- 8 files changed, 10 insertions(+), 9 deletions(-) commit c3592d0a55114144686ecf960cb516d6b31c98e9 Author: Lasse Collin Date: 2022-09-16 17:08:53 +0300 Tests: Add a test file for lzma_index_append() integer overflow bug. This test fails before commit 18d7facd3802b55c287581405c4d49c98708c136. test_files.sh now runs xz -l for bad-3-index-uncomp-overflow.xz because only then the previously-buggy code path gets tested. Normal decompression doesn't use lzma_index_append() at all. Instead, lzma_index_hash functions are used and those already did the overflow check. tests/files/README | 10 ++++++++++ tests/files/bad-3-index-uncomp-overflow.xz | Bin 0 -> 132 bytes tests/test_files.sh | 8 ++++++++ 3 files changed, 18 insertions(+) commit 982b29f828079a2a26253a40e975127a40a7d2bd Author: Lasse Collin Date: 2022-09-16 15:10:07 +0300 Translations: Add Turkish translation. po/LINGUAS | 1 + po/tr.po | 977 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 978 insertions(+) commit 1fc088d9f6d1697924aaeac8cd1fb9918d1532e2 Author: Lasse Collin Date: 2022-09-16 14:09:07 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 097c7b67ce86ff16a7cef7631b39e5ca4ee3d420 Author: Lasse Collin Date: 2022-09-16 14:07:03 +0300 xzgrep: Fix compatibility with old shells. Running the current xzgrep on Slackware 10.1 with GNU bash 3.00.15: xzgrep: line 231: syntax error near unexpected token `;;' On SCO OpenServer 5.0.7 with Korn Shell 93r: syntax error at line 231 : `;;' unexpected Turns out that some old shells don't like apostrophes (') inside command substitutions. For example, the following fails: x=$(echo foo # asdf'zxcv echo bar) printf '%s\n' "$x" The problem was introduced by commits 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 (2022-03-29), bd7b290f3fe4faeceb7d3497ed9bf2e6ed5e7dc5 (2022-07-18), and a648978b20495b7aa4a8b029c5a810b5ad9d08ff (2022-07-19). 5.2.6 is the only stable release that included this problem. Thanks to Kevin R. Bulgrien for reporting the problem on SCO OpenServer 5.0.7 and for providing the fix. src/scripts/xzgrep.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit f2d084fe3f0d6d71488bfc6987f26542f67bfd99 Author: Lasse Collin Date: 2022-09-09 14:12:30 +0300 Tests: Silence warnings about unused functions from tuktest.h. Warnings about unused tuktest_run_test conveniently tell which test programs haven't been converted to tuktest.h yet but I silenced that warning too for now anyway. It is fine to use __attribute__((__unused__)) even when the function is actually used because the attribute only means that the function might be unused. tests/tuktest.h | 10 ++++++++++ 1 file changed, 10 insertions(+) commit f8ee61e74eb40600445fdb601c374d582e1e9c8a Author: Lasse Collin Date: 2022-09-09 13:51:57 +0300 liblzma: lzma_filters_copy: Keep dest[] unmodified if an error occurs. lzma_stream_encoder() and lzma_stream_encoder_mt() always assumed this. Before this patch, failing lzma_filters_copy() could result in free(invalid_pointer) or invalid memory reads in stream_encoder.c or stream_encoder_mt.c. To trigger this, allocating memory for a filter options structure has to fail. These are tiny allocations so in practice they very rarely fail. Certain badness in the filter chain array could also make lzma_filters_copy() fail but both stream_encoder.c and stream_encoder_mt.c validate the filter chain before trying to copy it, so the crash cannot occur this way. src/liblzma/api/lzma/filter.h | 4 +++- src/liblzma/common/filter_common.c | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) commit 18d7facd3802b55c287581405c4d49c98708c136 Author: Jia Tan Date: 2022-09-02 20:18:55 +0800 liblzma: lzma_index_append: Add missing integer overflow check. The documentation in src/liblzma/api/lzma/index.h suggests that both the unpadded (compressed) size and the uncompressed size are checked for overflow, but only the unpadded size was checked. The uncompressed check is done first since that is more likely to occur than the unpadded or index field size overflows. src/liblzma/common/index.c | 4 ++++ 1 file changed, 4 insertions(+) commit 9ac06cb5b85274d18f9f70d82cf2d8c9c1151bd4 Author: Lasse Collin Date: 2022-09-08 15:11:08 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit ba3e4ba2de034ae93a513f9c3a0823b80cdb66dc Author: Jia Tan Date: 2022-09-08 15:07:00 +0300 CMake: Clarify a comment about Windows symlinks without file extension. CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) commit 17485e884ce5c74315f29a8a1507bc706cd5cd1d Author: Lasse Collin Date: 2022-09-08 15:02:41 +0300 CMake: Update for liblzma_*.map files and fix wrong common_w32res.rc dep. The previous commit split liblzma.map into liblzma_linux.map and liblzma_generic.map. This commit updates the CMake build for those. common_w32res.rc dependency was listed under Linux/FreeBSD while obviously it belongs to Windows when building a DLL. CMakeLists.txt | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) commit 913ddc5572b9455fa0cf299be2e35c708840e922 Author: Lasse Collin Date: 2022-09-04 23:23:00 +0300 liblzma: Vaccinate against an ill patch from RHEL/CentOS 7. RHEL/CentOS 7 shipped with 5.1.2alpha, including the threaded encoder that is behind #ifdef LZMA_UNSTABLE in the API headers. In 5.1.2alpha these symbols are under XZ_5.1.2alpha in liblzma.map. API/ABI compatibility tracking isn't done between development releases so newer releases didn't have XZ_5.1.2alpha anymore. Later RHEL/CentOS 7 updated xz to 5.2.2 but they wanted to keep the exported symbols compatible with 5.1.2alpha. After checking the ABI changes it turned out that >= 5.2.0 ABI is backward compatible with the threaded encoder functions from 5.1.2alpha (but not vice versa as fixes and extensions to these functions were made between 5.1.2alpha and 5.2.0). In RHEL/CentOS 7, XZ Utils 5.2.2 was patched with xz-5.2.2-compat-libs.patch to modify liblzma.map: - XZ_5.1.2alpha was added with lzma_stream_encoder_mt and lzma_stream_encoder_mt_memusage. This matched XZ Utils 5.1.2alpha. - XZ_5.2 was replaced with XZ_5.2.2. It is clear that this was an error; the intention was to keep using XZ_5.2 (XZ_5.2.2 has never been used in XZ Utils). So XZ_5.2.2 lists all symbols that were listed under XZ_5.2 before the patch. lzma_stream_encoder_mt and _mt_memusage are included too so they are listed both here and under XZ_5.1.2alpha. The patch didn't add any __asm__(".symver ...") lines to the .c files. Thus the resulting liblzma.so exports the threaded encoder functions under XZ_5.1.2alpha only. Listing the two functions also under XZ_5.2.2 in liblzma.map has no effect without matching .symver lines. The lack of XZ_5.2 in RHEL/CentOS 7 means that binaries linked against unpatched XZ Utils 5.2.x won't run on RHEL/CentOS 7. This is unfortunate but this alone isn't too bad as the problem is contained within RHEL/CentOS 7 and doesn't affect users of other distributions. It could also be fixed internally in RHEL/CentOS 7. The second problem is more serious: In XZ Utils 5.2.2 the API headers don't have #ifdef LZMA_UNSTABLE for obvious reasons. This is true in RHEL/CentOS 7 version too. Thus now programs using new APIs can be compiled without an extra #define. However, the programs end up depending on symbol version XZ_5.1.2alpha (and possibly also XZ_5.2.2) instead of XZ_5.2 as they would with an unpatched XZ Utils 5.2.2. This means that such binaries won't run on other distributions shipping XZ Utils >= 5.2.0 as they don't provide XZ_5.1.2alpha or XZ_5.2.2; they only provide XZ_5.2 (and XZ_5.0). (This includes RHEL/CentOS 8 as the patch luckily isn't included there anymore with XZ Utils 5.2.4.) Binaries built by RHEL/CentOS 7 users get distributed and then people wonder why they don't run on some other distribution. Seems that people have found out about the patch and been copying it to some build scripts, seemingly curing the symptoms but actually spreading the illness further and outside RHEL/CentOS 7. The ill patch seems to be from late 2016 (RHEL 7.3) and in 2017 it had spread at least to EasyBuild. I heard about the events only recently. :-( This commit splits liblzma.map into two versions: one for GNU/Linux and another for other OSes that can use symbol versioning (FreeBSD, Solaris, maybe others). The Linux-specific file and the matching additions to .c files add full compatibility with binaries that have been built against a RHEL/CentOS-patched liblzma. Builds for OSes other than GNU/Linux won't get the vaccine as they should be immune to the problem (I really hope that no build script uses the RHEL/CentOS 7 patch outside GNU/Linux). The RHEL/CentOS compatibility symbols XZ_5.1.2alpha and XZ_5.2.2 are intentionally put *after* XZ_5.2 in liblzma_linux.map. This way if one forgets to #define HAVE_SYMBOL_VERSIONS_LINUX when building, the resulting liblzma.so.5 will have lzma_stream_encoder_mt@@XZ_5.2 since XZ_5.2 {...} is the first one that lists that function. Without HAVE_SYMBOL_VERSIONS_LINUX @XZ_5.1.2alpha and @XZ_5.2.2 will be missing but that's still a minor problem compared to only having lzma_stream_encoder_mt@@XZ_5.1.2alpha! The "local: *;" line was moved to XZ_5.0 so that it doesn't need to be moved around. It doesn't matter where it is put. Having two similar liblzma_*.map files is a bit silly as it is, at least for now, easily possible to generate the generic one from the Linux-specific file. But that adds extra steps and increases the risk of mistakes when supporting more than one build system. So I rather maintain two files in parallel and let validate_map.sh check that they are in sync when "make mydist" is run. This adds .symver lines for lzma_stream_encoder_mt@XZ_5.2.2 and lzma_stream_encoder_mt_memusage@XZ_5.2.2 even though these weren't exported by RHEL/CentOS 7 (only @@XZ_5.1.2alpha was for these two). I added these anyway because someone might misunderstand the RHEL/CentOS 7 patch and think that @XZ_5.2.2 (@@XZ_5.2.2) versions were exported too. At glance one could suggest using __typeof__ to copy the function prototypes when making aliases. However, this doesn't work trivially because __typeof__ won't copy attributes (lzma_nothrow, lzma_pure) and it won't change symbol visibility from hidden to default (done by LZMA_API()). Attributes could be copied with __copy__ attribute but that needs GCC 9 and a fallback method would be needed anyway. This uses __symver__ attribute with GCC >= 10 and __asm__(".symver ...") with everything else. The attribute method is required for LTO (-flto) support with GCC. Using -flto with GCC older than 10 is now broken on GNU/Linux and will not be fixed (can silently result in a broken liblzma build that has dangerously incorrect symbol versions). LTO builds with Clang seem to work with the traditional __asm__(".symver ...") method. Thanks to Boud Roukema for reporting the problem and discussing the details and testing the fix. configure.ac | 23 +++- src/liblzma/Makefile.am | 10 +- src/liblzma/common/block_buffer_encoder.c | 18 ++++ src/liblzma/common/common.c | 14 +++ src/liblzma/common/common.h | 28 +++++ src/liblzma/common/hardware_cputhreads.c | 12 +++ src/liblzma/common/stream_encoder_mt.c | 42 ++++++++ src/liblzma/{liblzma.map => liblzma_generic.map} | 6 +- src/liblzma/liblzma_linux.map | 131 +++++++++++++++++++++++ src/liblzma/validate_map.sh | 113 +++++++++++++++++-- 10 files changed, 382 insertions(+), 15 deletions(-) commit 80a1a8bb838842a2be343bd88ad1462c21c5e2c9 Author: Lasse Collin Date: 2022-08-31 16:42:04 +0300 CMake: Add xz symlinks. These are a minor thing especially since the xz build has some real problems still like lack of large file support on 32-bit systems but I'll commit this since the code exists. Thanks to Jia Tan. CMakeLists.txt | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) commit a4193bb6d85d7765b1b87faeab3e50106d3ab2e9 Author: Lasse Collin Date: 2022-08-31 16:29:38 +0300 CMake: Put xz man page install under if(UNIX) like is for xzdec. Thanks to Jia Tan. CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) commit c1555b1a221a1427d4d650647531016d754bc4da Author: Lasse Collin Date: 2022-08-22 18:16:40 +0300 Bump version number for 5.3.3alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 44fedddc11c3f6ec2f7fe35a3e38f15ca93f90eb Author: Lasse Collin Date: 2022-08-22 18:13:56 +0300 Add NEWS for 5.3.3alpha. NEWS | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) commit a93e235d7ca764cc19f8f9f9063b40ff361c3cfa Author: Lasse Collin Date: 2022-07-12 17:59:41 +0300 Translations: Add Portuguese translation. Jia Tan made white-space changes and also changed "Language: pt_BR\n" to pt. The translator wasn't reached so I'm hoping these changes are OK and will commit it without translator's approval. Thanks to Pedro Albuquerque and Jia Tan. po/LINGUAS | 1 + po/pt.po | 1001 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1002 insertions(+) commit e7cf5a946f25e40d77f45e41f0dee7d42a04e9ae Author: Lasse Collin Date: 2022-07-10 21:16:40 +0300 Translations: Add Serbian translation. Quite a few white-space changes were made by Jia Tan to make this look good. Contacting the translator didn't succeed so I'm committing this without getting translator's approval. Thanks to Мирослав Николић (Miroslav Nikolic) and Jia Tan. po/LINGUAS | 1 + po/sr.po | 987 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 988 insertions(+) commit cc8617ab53b1f2a4da212fa76c92fe538269c5de Author: Lasse Collin Date: 2022-07-04 23:51:36 +0300 Translations: Add Swedish translation. Thanks to Sebastian Rasmussen and Jia Tan. po/LINGUAS | 1 + po/sv.po | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 984 insertions(+) commit c613598c358b640682d0ca2aed38fa7df763e8c7 Author: Lasse Collin Date: 2022-07-04 23:40:27 +0300 Translations: Add Esperanto translation. Thanks to Keith Bowes and Jia Tan. po/LINGUAS | 1 + po/eo.po | 984 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 985 insertions(+) commit 659a587d678f21e98e91d2751c31d4ce050c081a Author: Lasse Collin Date: 2022-07-01 00:22:33 +0300 Translations: Add Catalan translation. Thanks to Jordi Mas and Jia Tan. po/LINGUAS | 1 + po/ca.po | 1076 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1077 insertions(+) commit 739fd8c9bdf1d85f57d56642aad87148d4779530 Author: Lasse Collin Date: 2022-06-30 17:47:08 +0300 Translations: Add Ukrainian translation. Thanks to Yuri Chornoivan and Jia Tan. po/LINGUAS | 1 + po/uk.po | 996 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 997 insertions(+) commit 73280550b111930c62a667e56add8fd574f80bc8 Author: Lasse Collin Date: 2022-06-30 17:45:26 +0300 Translators: Add Romanian translation. Thanks to Remus-Gabriel Chelu and Jia Tan. po/LINGUAS | 1 + po/ro.po | 1016 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1017 insertions(+) commit 2465f5b825152714b2c357d96c2422c31109d320 Author: Lasse Collin Date: 2022-06-29 18:33:32 +0300 Translations: Update Brazilian Portuguese translation. One msgstr was changed. The diff is long due to changes in the source code line numbers in the comments. Thanks to Rafael Fontenelle. po/pt_BR.po | 186 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 92 insertions(+), 94 deletions(-) commit 434e1ffd3e62719d509da10b646216f5ef53fe4d Author: Lasse Collin Date: 2022-06-29 18:04:44 +0300 Translations: Add Croatian translation. Thanks to Božidar Putanec and Jia Tan. po/LINGUAS | 1 + po/hr.po | 987 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 988 insertions(+) commit 0732d0f7065c9bd48bfe4f5983144ae970c4a499 Author: Lasse Collin Date: 2022-06-29 17:58:48 +0300 Translations: Add Spanish translation. Thanks to Cristian Othón Martínez Vera and Jia Tan. po/LINGUAS | 1 + po/es.po | 984 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 985 insertions(+) commit 9899b0f82bc130998d1f1f618a6ab805b73f2696 Author: Lasse Collin Date: 2022-06-29 17:49:43 +0300 Translations: Add Korean translation. Thanks to Seong-ho Cho and Jia Tan. po/LINGUAS | 1 + po/ko.po | 972 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 973 insertions(+) commit 65217eaf6bd195f3ef027d4ac55d57b7d133d69f Author: Lasse Collin Date: 2020-03-16 21:57:21 +0200 Translations: Rebuild cs.po to avoid incorrect fuzzy strings. "make dist" updates the .po files and the fuzzy strings would result in multiple very wrong translations. po/cs.po | 592 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 322 insertions(+), 270 deletions(-) commit e56ff423ee7af9e648e16b132f0d835d2cb4db26 Author: Lasse Collin Date: 2020-03-16 17:30:39 +0200 Translations: Add partial Danish translation. I made a few minor white space changes without getting them approved by the Danish translation team. po/LINGUAS | 1 + po/da.po | 896 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 897 insertions(+) commit 43e09c62e77cb8807d932c81de4decbdb956e184 Author: Lasse Collin Date: 2020-03-11 22:37:54 +0200 Translations: Add hu, zh_CN, and zh_TW. I made a few white space changes to these without getting them approved by the translation teams. (I tried to contact the hu and zh_TW teams but didn't succeed. I didn't contact the zh_CN team.) po/LINGUAS | 3 + po/hu.po | 985 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/zh_CN.po | 963 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/zh_TW.po | 956 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 2907 insertions(+) commit 982b6b198ae1ffe6093236dd8a3d03d1415b912b Author: Lasse Collin Date: 2020-03-11 14:33:30 +0200 Translations: Update vi.po to match the file from the TP. The translated strings haven't been updated but word wrapping is different. po/vi.po | 407 ++++++++++++++++++++++++++++----------------------------------- 1 file changed, 179 insertions(+), 228 deletions(-) commit 801f39691fc4abc6dd33d7653d498781b593f3eb Author: Lasse Collin Date: 2020-03-11 14:18:03 +0200 Translations: Add fi and pt_BR, and update de, fr, it, and pl. The German translation isn't identical to the file in the Translation Project but the changes (white space changes only) were approved by the translator Mario Blättermann. po/LINGUAS | 2 + po/de.po | 476 ++++++++++++++-------------- po/fi.po | 974 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/fr.po | 272 ++++++++-------- po/it.po | 479 ++++++++++++---------------- po/pl.po | 239 +++++++------- po/pt_BR.po | 1001 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 2697 insertions(+), 746 deletions(-) commit 311e4f85ede5d2f0bb71f3ad70b5b7db1b5adf33 Author: Lasse Collin Date: 2022-08-22 17:27:19 +0300 xz: Try to clarify --memlimit-mt-decompress vs. --memlimit-compress. src/xz/xz.1 | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) commit df23c31000283c00e5ef1ca32a0bc3bb757bd707 Author: Lasse Collin Date: 2022-08-22 16:46:18 +0300 CMake: Add liblzma tests. Thanks to Jia Tan for the patch. CMakeLists.txt | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) commit 02a777f9c422d3b5ec895078530bb1a2b6f7bdf5 Author: Lasse Collin Date: 2022-08-19 23:32:22 +0300 xz: Revise --info-memory output. The strings could be more descriptive but it's good to have some version of this committed now. --robot mode wasn't changed yet. src/xz/hardware.c | 32 ++++++++++++++++++++++++++------ src/xz/xz.1 | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) commit f864f6d42eab57ea8ed82cc2dd19a03b51377442 Author: Lasse Collin Date: 2022-08-19 23:12:02 +0300 xz: Update the man page for threaded decompression and memlimits. This documents the changes made in commits 6c6da57ae2aa962aabde6892442227063d87e88c, cad299008cf73ec566f0662a9cf2b94f86a99659, and 898faa97287a756231c663a3ed5165672b417207. The --info-memory bit hasn't been finished yet even though it's already mentioned in this commit under --memlimit-mt-decompress and --threads. src/xz/xz.1 | 148 +++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 121 insertions(+), 27 deletions(-) commit d13bfcc0056617dd648f655a01653932fad7067f Author: Lasse Collin Date: 2022-08-18 17:49:16 +0300 Build: Include the CMake files in the distribution. This was supposed to be done in 2020 with 5.2.5 release already but it was noticed only today. 5.2.5 and 5.2.6 even mention experiemental CMake support in the NEWS entries. Thanks to Olivier B. for reporting the problem. Makefile.am | 2 ++ 1 file changed, 2 insertions(+) commit e66787bcfefdb93f19c974f895f65969a77937b0 Author: Lasse Collin Date: 2022-08-18 17:38:05 +0300 Windows: Fix broken liblzma.dll build with Visual Studio project files. The bug was introduced in 352ba2d69af2136bc814aa1df1a132559d445616 "Windows: Fix building of resource files when config.h isn't used." That commit fixed liblzma.dll build with CMake while keeping it working with Autotools on Windows but the VS project files were forgotten. I haven't tested these changes. Thanks to Olivier B. for reporting the bug and for the initial patch. windows/vs2013/liblzma_dll.vcxproj | 6 ++++++ windows/vs2017/liblzma_dll.vcxproj | 6 ++++++ windows/vs2019/liblzma_dll.vcxproj | 6 ++++++ 3 files changed, 18 insertions(+) commit c4e8e5fb311225b8b48d34157891a640b2535e0c Author: Lasse Collin Date: 2022-08-18 17:16:49 +0300 liblzma: Threaded decoder: Improve LZMA_FAIL_FAST when LZMA_FINISH is used. It will now return LZMA_DATA_ERROR (not LZMA_OK or LZMA_BUF_ERROR) if LZMA_FINISH is used and there isn't enough input to finish decoding the Block Header or the Block. The use of LZMA_DATA_ERROR is simpler and the less risky than LZMA_BUF_ERROR but this might be changed before 5.4.0. src/liblzma/api/lzma/container.h | 6 +++++ src/liblzma/common/stream_decoder_mt.c | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) commit 6dcf606e7efa2b259f0262f9e2f61e00116842d3 Author: Lasse Collin Date: 2022-08-12 18:31:47 +0300 Add NEWS for 5.2.6. NEWS | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) commit 413b86fcf8934fae5a004f378a9483d37d8fcaab Author: Lasse Collin Date: 2022-08-12 14:28:41 +0300 Add Jia Tan to AUTHORS. AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 352672732c346c546ff3c26d0605bc0ed1c8b7c7 Author: Lasse Collin Date: 2022-07-25 19:28:26 +0300 Build: Start the generated ChangeLog from around 5.2.0 instead of 5.0.0. This makes ChangeLog smaller. Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6f6d11225d6016be2bbb55d845b66f6b04d048df Author: Lasse Collin Date: 2022-07-25 19:11:05 +0300 Translations: Change the copyright comment string to use with po4a. This affects the second line in po4a/xz-man.pot. The man pages of xzdiff, xzgrep, and xzmore are from GNU gzip and under GNU GPLv2+ while the rest of the man pages are in the public domain. po4a/update-po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 61f8ec804abdb4c5dac01e8ae9b90c7be58a5c24 Author: Jia Tan Date: 2022-07-25 18:30:05 +0300 liblzma: Refactor lzma_mf_is_supported() to use a switch-statement. src/liblzma/lz/lz_encoder.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) commit 4d80b463a1251aa22eabc87d2732fec13b1adda6 Author: Jia Tan Date: 2022-07-25 18:20:01 +0300 Build: Don't allow empty LIST in --enable-match-finders=LIST. It's enforced only when a match finder is needed, that is, when LZMA1 or LZMA2 encoder is enabled. configure.ac | 4 ++++ 1 file changed, 4 insertions(+) commit 9cc721af5436908f2d5a828aebbc4050a32a3487 Author: Lasse Collin Date: 2022-07-24 13:27:48 +0300 xz: Update the man page that change to --keep will be in 5.2.6. src/xz/xz.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit b81bf0c7d1873e52a4086a9abb494471d652cb55 Author: Lasse Collin Date: 2022-07-19 23:23:54 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 340cf1ec3927767046b8293a49da3db4e393f426 Author: Nicholas Jackson Date: 2022-07-17 17:39:23 -0700 CMake: Add missing source file to liblzma build CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) commit d796b6d7fdb8b7238b277056cf9146cce25db604 Author: Lasse Collin Date: 2022-07-19 23:19:49 +0300 xzgrep man page: Document exit statuses. src/scripts/xzgrep.1 | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) commit 923bf96b55e5216a6c8df9d8331934f54784390e Author: Lasse Collin Date: 2022-07-19 23:13:24 +0300 xzgrep: Improve error handling, especially signals. xzgrep wouldn't exit on SIGPIPE or SIGQUIT when it clearly should have. It's quite possible that it's not perfect still but at least it's much better. If multiple exit statuses compete, now it tries to pick the largest of value. Some comments were added. The exit status handling of signals is still broken if the shell uses values larger than 255 in $? to indicate that a process died due to a signal ***and*** their "exit" command doesn't take this into account. This seems to work well with the ksh and yash versions I tried. However, there is a report in gzip/zgrep that OpenSolaris 5.11 (not 5.10) has a problem with "exit" truncating the argument to 8 bits: https://debbugs.gnu.org/cgi/bugreport.cgi?bug=22900#25 Such a bug would break xzgrep but I didn't add a workaround at least for now. 5.11 is old and I don't know if the problem exists in modern descendants, or if the problem exists in other ksh implementations in use. src/scripts/xzgrep.in | 72 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 19 deletions(-) commit a648978b20495b7aa4a8b029c5a810b5ad9d08ff Author: Lasse Collin Date: 2022-07-19 00:10:55 +0300 xzgrep: Make the fix for ZDI-CAN-16587 more robust. I don't know if this can make a difference in the real world but it looked kind of suspicious (what happens with sed implementations that cannot process very long lines?). At least this commit shouldn't make it worse. src/scripts/xzgrep.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit bd7b290f3fe4faeceb7d3497ed9bf2e6ed5e7dc5 Author: Lasse Collin Date: 2022-07-18 21:52:31 +0300 xzgrep: Use grep -H --label when available (GNU, *BSDs). It avoids the use of sed for prefixing filenames to output lines. Using sed for that is slower and prone to security bugs so now the sed method is only used as a fallback. This also fixes an actual bug: When grepping a binary file, GNU grep nowadays prints its diagnostics to stderr instead of stdout and thus the sed-method for prefixing the filename doesn't work. So with this commit grepping binary files gives reasonable output with GNU grep now. This was inspired by zgrep but the implementation is different. src/scripts/xzgrep.in | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) commit b56729af9f1a596e57aeefd7570d8d7dce5c9f52 Author: Lasse Collin Date: 2022-07-18 21:10:25 +0300 xzgrep: Use -e to specify the pattern to grep. Now we don't need the separate test for adding the -q option as it can be added directly in the two places where it's needed. src/scripts/xzgrep.in | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) commit bad61b5997e6647911974022bfb72f3d4818a594 Author: Lasse Collin Date: 2022-07-18 19:18:48 +0300 Scripts: Use printf instead of echo in a few places. It's a good habbit as echo has some portability corner cases when the string contents can be anything. src/scripts/xzdiff.in | 6 +++--- src/scripts/xzgrep.in | 4 ++-- src/scripts/xzless.in | 4 ++-- src/scripts/xzmore.in | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) commit 6a4a4a7d2667837dc824c26fcb19ed6ca5aff645 Author: Lasse Collin Date: 2022-07-17 21:36:25 +0300 xzgrep: Add more LC_ALL=C to avoid bugs with multibyte characters. Also replace one use of expr with printf. The rationale for LC_ALL=C was already mentioned in 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 that fixed a security issue. However, unrelated uses weren't changed in that commit yet. POSIX says that with sed and such tools one should use LC_ALL=C to ensure predictable behavior when strings contain byte sequences that aren't valid multibyte characters in the current locale. See under "Application usage" in here: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html With GNU sed invalid multibyte strings would work without this; it's documented in its Texinfo manual. Some other implementations aren't so forgiving. src/scripts/xzgrep.in | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) commit b48f9d615f2c2e8d2f6e253d0e48ee66d0652b68 Author: Lasse Collin Date: 2022-07-17 20:55:16 +0300 xzgrep: Fix parsing of certain options. Fix handling of "xzgrep -25 foo" (in GNU grep "grep -25 foo" is an alias for "grep -C25 foo"). xzgrep would treat "foo" as filename instead of as a pattern. This bug was fixed in zgrep in gzip in 2012. Add -E, -F, -G, and -P to the "no argument required" list. Add -X to "argument required" list. It is an intentionally-undocumented GNU grep option so this isn't an important option for xzgrep but it seems that other grep implementations (well, those that I checked) don't support -X so I hope this change is an improvement still. grep -d (grep --directories=ACTION) requires an argument. In contrast to zgrep, I kept -d in the "no argument required" list because it's not supported in xzgrep (or zgrep). This way "xzgrep -d" gives an error about option being unsupported instead of telling that it requires an argument. Both zgrep and xzgrep tell that it's unsupported if an argument is specified. Add comments. src/scripts/xzgrep.in | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) commit 2d2201bc6315deda4d43625aa510972467bd51d4 Author: Lasse Collin Date: 2022-07-14 20:33:05 +0300 Tests: Add the .lzma files to test_files.sh. tests/test_files.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) commit ce5549a591bf126300618879f5b24023351baff6 Author: Lasse Collin Date: 2022-07-14 19:37:42 +0300 Tests: Add .lzma test files. tests/files/README | 63 ++++++++++++++++----- tests/files/bad-too_big_size-with_eopm.lzma | Bin 0 -> 37 bytes tests/files/bad-too_small_size-without_eopm-1.lzma | Bin 0 -> 31 bytes tests/files/bad-too_small_size-without_eopm-2.lzma | Bin 0 -> 31 bytes tests/files/bad-too_small_size-without_eopm-3.lzma | Bin 0 -> 36 bytes tests/files/bad-unknown_size-without_eopm.lzma | Bin 0 -> 31 bytes tests/files/good-known_size-with_eopm.lzma | Bin 0 -> 37 bytes tests/files/good-known_size-without_eopm.lzma | Bin 0 -> 31 bytes tests/files/good-unknown_size-with_eopm.lzma | Bin 0 -> 37 bytes 9 files changed, 50 insertions(+), 13 deletions(-) commit 107c93ee5cad51a6ea0cee471209bfe8d76deaa3 Author: Lasse Collin Date: 2022-07-14 18:12:38 +0300 liblzma: Rename a variable and improve a comment. src/liblzma/lzma/lzma_decoder.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) commit 511feb5eadb988d641b025d597f4fac7502003b8 Author: Lasse Collin Date: 2022-07-13 22:24:41 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 9595a3119b9faf0ce01375329cad8bbf85c35ea2 Author: Lasse Collin Date: 2022-07-13 22:24:07 +0300 liblzma: Add optional autodetection of LZMA end marker. Turns out that this is needed for .lzma files as the spec in LZMA SDK says that end marker may be present even if the size is stored in the header. Such files are rare but exist in the real world. The code in liblzma is so old that the spec didn't exist in LZMA SDK back then and I had understood that such files weren't possible (the lzma tool in LZMA SDK didn't create such files). This modifies the internal API so that LZMA decoder can be told if EOPM is allowed even when the uncompressed size is known. It's allowed with .lzma and not with other uses. Thanks to Karl Beldan for reporting the problem. doc/lzma-file-format.txt | 11 +++- src/liblzma/common/alone_decoder.c | 2 +- src/liblzma/common/microlzma_decoder.c | 2 +- src/liblzma/lz/lz_decoder.c | 10 +++- src/liblzma/lz/lz_decoder.h | 8 +-- src/liblzma/lzma/lzma2_decoder.c | 2 +- src/liblzma/lzma/lzma_decoder.c | 99 ++++++++++++++++++++++++---------- 7 files changed, 95 insertions(+), 39 deletions(-) commit 0c0f8e9761eb6eaf199082cf144db7ac5f9d8cb2 Author: Lasse Collin Date: 2022-07-12 18:53:04 +0300 xz: Document the special memlimit case of 2000 MiB on MIPS32. See commit fc3d3a7296ef58bb799a73943636b8bfd95339f7. src/xz/xz.1 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit d1bfa3dc703325ecd974167e864a8712fdfe936e Author: Jia Tan Date: 2022-07-01 21:19:26 +0800 Created script to generate code coverage reports. The script uses lcov and genhtml after running the tests to show the code coverage statistics. The script will create a coverage directory where it is run. It can be run both in and out of the source directory. .gitignore | 4 +++ tests/code_coverage.sh | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) commit 86a30b0255d8064169fabfd213d907016d2f9f2a Author: Jia Tan Date: 2022-06-16 17:32:19 +0300 Tests: Add more tests into test_check. tests/test_check.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++-- tests/tests.h | 23 +++++ 2 files changed, 295 insertions(+), 7 deletions(-) commit 82e30fed66a89706388a8c15dc954d84e63f38fa Author: Lasse Collin Date: 2022-06-16 15:02:57 +0300 Tests: Use char[][24] array for enum_strings_lzma_ret. Array of pointers to short strings is a bit pointless here and now it's fully const. tests/tests.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5ba9459e6c4a29f6870ca78ce8ac6e519d59c41e Author: Lasse Collin Date: 2022-06-16 14:12:14 +0300 Tests: tuktest.h: Add tuktest_error_impl to help with error conditions. tests/tuktest.h | 72 +++++++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 40 deletions(-) commit b339892668da20aea22a93668c82b87a38e4a97f Author: Lasse Collin Date: 2022-06-16 13:29:59 +0300 Tests: tuktest.h: Rename file_from_* and use tuktest_malloc there. tests/test_bcj_exact_size.c | 4 +--- tests/tuktest.h | 52 +++++++++++++++++++++------------------------ 2 files changed, 25 insertions(+), 31 deletions(-) commit d8b63a0ad68d1c461eb373466679ebc41fbc207d Author: Lasse Collin Date: 2022-06-16 13:08:19 +0300 Tests: tuktest.h: Add malloc wrapper with automatic freeing. tests/tuktest.h | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) commit 1d51536a4b19a8fae768f8eb462fc2238cb36d53 Author: Lasse Collin Date: 2022-06-16 11:47:37 +0300 Tests: tuktest.h: Move a function. tests/tuktest.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) commit 70c7555f6403553ee35539e869de0025592d8564 Author: Lasse Collin Date: 2022-06-14 22:21:15 +0300 Tests: test_vli: Remove an invalid test-assertion. lzma_vli is unsigned so trying a signed value results in a compiler warning from -Wsign-conversion. (lzma_vli)-1 equals to LZMA_VLI_UNKNOWN anyway which is the next assertion. tests/test_vli.c | 2 -- 1 file changed, 2 deletions(-) commit 154b73c5a1092c3f785e01666b564ad7ff1be555 Author: Lasse Collin Date: 2022-06-14 22:10:10 +0300 Tests: test_vli: Add const where appropriate. tests/test_vli.c | 53 ++++++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 25 deletions(-) commit 0354d6cce3ff98ea6f927107baf216253f6ce2bb Author: Jia Tan Date: 2022-06-13 20:27:03 +0800 Added vli tests to .gitignore .gitignore | 1 + 1 file changed, 1 insertion(+) commit a08f5ccf6bdc20ef70e41f6f3321618ef146f96e Author: Jia Tan Date: 2022-06-12 11:31:40 +0800 Created tests for all functions exported in vli.h Achieved 100% code coverage vli_encoder.c, vli_decoder.c, and vli_size.c tests/Makefile.am | 4 +- tests/test_vli.c | 308 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 311 insertions(+), 1 deletion(-) commit 1e3eb61815a91c0a1bfbb802e2d95593f523491f Author: jiat75 Date: 2022-06-03 21:24:54 +0800 Added parallel test artifacts to .gitignore .gitignore | 2 ++ 1 file changed, 2 insertions(+) commit 00e3613f1212eaa84e721585fdb4de6967cf2476 Author: Lasse Collin Date: 2022-06-14 21:29:21 +0300 Tests: Use good-1-empty-bcj-lzma2.xz in test_bcj_exact_size. It's much nicer this way so that the test data isn't a hardcoded table inside the C file. tests/test_bcj_exact_size.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) commit 86bab755be252bfd3e0a9aee8e7b83a9bbb23ed0 Author: Lasse Collin Date: 2022-06-14 21:26:13 +0300 Tests: Add file reading helpers to tuktest.h. tests/tuktest.h | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 7 deletions(-) commit 83d2337b72dbf391c6f3b41889eea99e51679105 Author: Lasse Collin Date: 2022-06-14 18:21:57 +0300 Tests: tuktest.h: Move a printf from a macro to a helper function. tests/tuktest.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) commit f9e8176ea7d520797a2db2d49a5a632c285674a8 Author: Lasse Collin Date: 2022-06-14 17:20:49 +0300 Tests: Add test file good-1-empty-bcj-lzma2.xz. This is from test_bcj_exact_size.c. It's good to have it as a standalone file. tests/files/README | 5 +++++ tests/files/good-1-empty-bcj-lzma2.xz | Bin 0 -> 52 bytes 2 files changed, 5 insertions(+) commit aa75c5563a760aea3aa23d997d519e702e82726b Author: Jia Tan Date: 2022-06-10 21:35:18 +0800 Tests: Created tests for hardware functions. Created tests for all API functions exported in src/liblzma/api/lzma/hardware.h. The tests are fairly trivial but are helpful because they will inform users if their machines cannot support these functions. They also improve the code coverage metrics. .gitignore | 1 + tests/Makefile.am | 2 ++ tests/test_hardware.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) commit 5c8ffdca20813939818843476fb212dfae8838a3 Author: Lasse Collin Date: 2022-06-02 21:01:45 +0300 Tests: Convert test_check to tuktest. Thanks to Jia Tan for help with all the tests. tests/test_check.c | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) commit faf5ff8899d539b4dcd2a7e5280cb820a4746c86 Author: Lasse Collin Date: 2022-06-02 20:31:03 +0300 Tests: Convert test_block_header to tuktest. tests/test_block_header.c | 89 +++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 37 deletions(-) commit 754d39fbebee3782258d42f154a223d3c5770ec7 Author: Lasse Collin Date: 2022-06-02 20:28:23 +0300 Tests: Convert test_bcj_exact_size to tuktest. The compress() and decompress() functions were merged because the later depends on the former so they need to be a single test case. tests/test_bcj_exact_size.c | 75 +++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 34 deletions(-) commit 96da21470f9570cd08286906a050a7c22631775b Author: Lasse Collin Date: 2022-06-02 20:27:00 +0300 Tests: Include tuktest.h in tests.h. This breaks -Werror because none of the tests so far use tuktest.h and thus there are warnings about unused variables and functions. tests/tests.h | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) commit df71ba1c991f60c3269aaadd398247e632714626 Author: Lasse Collin Date: 2022-06-02 20:25:21 +0300 Tests: Add tuktest.h mini-test-framework. tests/Makefile.am | 1 + tests/tuktest.h | 752 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 753 insertions(+) commit 4773608554d1b684a05ff9c1d879cf5c42266d33 Author: Lasse Collin Date: 2022-05-23 21:31:36 +0300 Build: Enable Automake's parallel test harness. It has been the default for quite some time already and the old serial harness isn't discouraged. The downside is that with parallel tests one cannot print progress info or other diagnostics to the terminal; all output from the tests will be in the log files only. But now that the compression tests are separated the parallel tests will speed things up. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 9a6dd6d46f7f256a5573e5d874c1052033ed7b05 Author: Lasse Collin Date: 2022-05-23 21:17:47 +0300 Tests: Split test_compress.sh into separate test unit for each file. test_compress.sh now takes one command line argument: a filename to be tested. If it begins with "compress_generated_" the file will be created with create_compress_files. This will allow parallel execution of the slow tests. tests/Makefile.am | 11 +++- tests/test_compress.sh | 91 +++++++++++++++++++--------------- tests/test_compress_generated_abc | 3 ++ tests/test_compress_generated_random | 3 ++ tests/test_compress_generated_text | 3 ++ tests/test_compress_prepared_bcj_sparc | 3 ++ tests/test_compress_prepared_bcj_x86 | 3 ++ 7 files changed, 77 insertions(+), 40 deletions(-) commit c7758ac9c734707514dd34f254173ebac5eea7f8 Author: Lasse Collin Date: 2022-05-23 20:32:49 +0300 Test: Make create_compress_files.c a little more flexible. If a command line argument is given, then only the test file of that type is created. It's quite dumb in sense that unknown names don't give an error but it's good enough here. Also use EXIT_FAILURE instead of 1 as exit status for errors. tests/create_compress_files.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) commit 4a8e4a7b0ad4b03c0ac6862716c3457452cdaf8c Author: Lasse Collin Date: 2022-05-23 20:17:42 +0300 Tests: Remove unneeded commented lines from test_compress.sh. tests/test_compress.sh | 13 ------------- 1 file changed, 13 deletions(-) commit 2ee50d150ee009f36135540b459e6ff328549725 Author: Lasse Collin Date: 2022-05-23 20:16:00 +0300 Tests: Remove progress indicator from test_compress.sh. It will be useless with Automake's parallel tests. tests/test_compress.sh | 9 --------- 1 file changed, 9 deletions(-) commit 2ce4f36f179a81d0c6e182a409f363df759d1ad0 Author: Lasse Collin Date: 2022-05-23 19:37:18 +0300 liblzma: Silence a warning. The actual initialization is done via mythread_sync and seems that GCC doesn't necessarily see that it gets initialized there. src/liblzma/common/stream_decoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5d8f3764ef43c35910e6d7003c0900a961ef6544 Author: Lasse Collin Date: 2022-04-14 20:53:16 +0300 xz: Fix build with --disable-threads. src/xz/hardware.c | 4 ++++ 1 file changed, 4 insertions(+) commit 1d592897278b172d8549aa29c3a1f3a4f432a9b9 Author: Lasse Collin Date: 2022-04-14 14:50:17 +0300 xz: Change the cap of the default -T0 memlimit for 32-bit xz. The SIZE_MAX / 3 was 1365 MiB. 1400 MiB gives little more room and it looks like a round (artificial) number in --info-memory once --info-memory is made to display it. Also, using #if avoids useless code on 64-bit builds. src/xz/hardware.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit c77fe55ddb7752ed0fec46967c5ec9a72632ea0c Author: Lasse Collin Date: 2022-04-14 14:20:46 +0300 xz: Add a default soft memory usage limit for --threads=0. This is a soft limit in sense that it only affects the number of threads. It never makes xz fail and it never makes xz change settings that would affect the compressed output. The idea is to make -T0 have more reasonable behavior when the system has very many cores or when a memory-hungry compression options are used. This also helps with 32-bit xz, preventing it from running out of address space. The downside of this commit is that now the number of threads might become too low compared to what the user expected. I hope this to be an acceptable compromise as the old behavior has been a source of well-argued complaints for a long time. src/xz/coder.c | 28 ++++++++++++++++++++++++++-- src/xz/hardware.c | 38 +++++++++++++++++++++++++++++--------- src/xz/hardware.h | 27 +++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 11 deletions(-) commit 0adc13bfe32c14f3e4c6ce9f2d4fdf4112ab53f4 Author: Lasse Collin Date: 2022-04-14 12:59:09 +0300 xz: Make -T0 use multithreaded mode on single-core systems. The main problem withi the old behavior is that the compressed output is different on single-core systems vs. multicore systems. This commit fixes it by making -T0 one thread in multithreaded mode on single-core systems. The downside of this is that it uses more memory. However, if --memlimit-compress is used, xz can (thanks to the previous commit) drop to the single-threaded mode still. src/xz/coder.c | 18 +++++++++--------- src/xz/hardware.c | 14 ++++++++++++++ src/xz/hardware.h | 4 ++++ 3 files changed, 27 insertions(+), 9 deletions(-) commit 898faa97287a756231c663a3ed5165672b417207 Author: Lasse Collin Date: 2022-04-14 12:38:00 +0300 xz: Changes to --memlimit-compress and --no-adjust. In single-threaded mode, --memlimit-compress can make xz scale down the LZMA2 dictionary size to meet the memory usage limit. This obviously affects the compressed output. However, if xz was in threaded mode, --memlimit-compress could make xz reduce the number of threads but it wouldn't make xz switch from multithreaded mode to single-threaded mode or scale down the LZMA2 dictionary size. This seemed illogical and there was even a "FIXME?" about it. Now --memlimit-compress can make xz switch to single-threaded mode if one thread in multithreaded mode uses too much memory. If memory usage is still too high, then the LZMA2 dictionary size can be scaled down too. The option --no-adjust was also changed so that it no longer prevents xz from scaling down the number of threads as that doesn't affect compressed output (only performance). After this commit --no-adjust only prevents adjustments that affect compressed output, that is, with --no-adjust xz won't switch from multithreaded mode to single-threaded mode and won't scale down the LZMA2 dictionary size. The man page wasn't updated yet. src/xz/coder.c | 63 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 20 deletions(-) commit cad299008cf73ec566f0662a9cf2b94f86a99659 Author: Lasse Collin Date: 2022-04-11 22:20:49 +0300 xz: Add --memlimit-mt-decompress along with a default limit value. --memlimit-mt-decompress allows specifying the limit for multithreaded decompression. This matches memlimit_threading in liblzma. This limit can only affect the number of threads being used; it will never prevent xz from decompressing a file. The old --memlimit-decompress option is still used at the same time. If the value of --memlimit-decompress (the default value or one specified by the user) is less than the value of --memlimit-mt-decompress , then --memlimit-mt-decompress is reduced to match --memlimit-decompress. Man page wasn't updated yet. src/xz/args.c | 24 +++++++++++++++------- src/xz/coder.c | 34 ++++++++++--------------------- src/xz/hardware.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++----- src/xz/hardware.h | 17 ++++++++++------ src/xz/message.c | 4 +++- 5 files changed, 97 insertions(+), 42 deletions(-) commit fe87b4cd5364f5bbb6a75a0299f1500c852d7c9a Author: Lasse Collin Date: 2022-04-06 23:11:59 +0300 liblzma: Threaded decoder: Improve setting of pending_error. It doesn't need to be done conditionally. The comments try to explain it. src/liblzma/common/stream_decoder_mt.c | 51 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) commit 90621da7f6e1bfd6d91d60415eae04b2bca274c2 Author: Lasse Collin Date: 2022-04-06 12:39:49 +0300 liblzma: Add a new flag LZMA_FAIL_FAST for threaded decoder. In most cases if the input file is corrupt the application won't care about the uncompressed content at all. With this new flag the threaded decoder will return an error as soon as any thread has detected an error; it won't wait to copy out the data before the location of the error. I don't plan to use this in xz to keep the behavior consistent between single-threaded and multi-threaded modes. src/liblzma/api/lzma/container.h | 25 ++++++++++++++++- src/liblzma/common/common.h | 7 +++-- src/liblzma/common/stream_decoder_mt.c | 50 +++++++++++++++++++--------------- 3 files changed, 56 insertions(+), 26 deletions(-) commit 64b6d496dc815a176d8307f418f6834a26783484 Author: Lasse Collin Date: 2022-04-05 12:24:57 +0300 liblzma: Threaded decoder: Always wait for output if LZMA_FINISH is used. This makes the behavior consistent with the single-threaded decoder when handling truncated .xz files. Thanks to Jia Tan for finding this issue. src/liblzma/common/stream_decoder_mt.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) commit e671bc8828b9c0c5406c3a22c541301d0eb54518 Author: Lasse Collin Date: 2022-04-02 21:49:59 +0300 liblzma: Threaded decoder: Support zpipe.c-style decoding loop. This makes it possible to call lzma_code() in a loop that only reads new input when lzma_code() didn't fill the output buffer completely. That isn't the calling style suggested by the liblzma example program 02_decompress.c so perhaps the usefulness of this feature is limited. Also, it is possible to write such a loop so that it works with the single-threaded decoder but not with the threaded decoder even after this commit, or so that it works only if lzma_mt.timeout = 0. The zlib tutorial is a well-known example of a loop where more input is read only when output isn't full. Porting this as is to liblzma would work with the single-threaded decoder (if LZMA_CONCATENATED isn't used) but it wouldn't work with threaded decoder even after this commit because the loop assumes that no more output is possible when it cannot read more input ("if (strm.avail_in == 0) break;"). This cannot be fixed at liblzma side; the loop has to be modified at least a little. I'm adding this in any case because the actual code is simple and short and should have no harmful side-effects in other situations. src/liblzma/common/stream_decoder_mt.c | 77 +++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 10 deletions(-) commit 2ba8173e27be4793edb46497e499ac2ae753a316 Author: Lasse Collin Date: 2022-03-31 00:05:07 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 Author: Lasse Collin Date: 2022-03-29 19:19:12 +0300 xzgrep: Fix escaping of malicious filenames (ZDI-CAN-16587). Malicious filenames can make xzgrep to write to arbitrary files or (with a GNU sed extension) lead to arbitrary code execution. xzgrep from XZ Utils versions up to and including 5.2.5 are affected. 5.3.1alpha and 5.3.2alpha are affected as well. This patch works for all of them. This bug was inherited from gzip's zgrep. gzip 1.12 includes a fix for zgrep. The issue with the old sed script is that with multiple newlines, the N-command will read the second line of input, then the s-commands will be skipped because it's not the end of the file yet, then a new sed cycle starts and the pattern space is printed and emptied. So only the last line or two get escaped. One way to fix this would be to read all lines into the pattern space first. However, the included fix is even simpler: All lines except the last line get a backslash appended at the end. To ensure that shell command substitution doesn't eat a possible trailing newline, a colon is appended to the filename before escaping. The colon is later used to separate the filename from the grep output so it is fine to add it here instead of a few lines later. The old code also wasn't POSIX compliant as it used \n in the replacement section of the s-command. Using \ is the POSIX compatible method. LC_ALL=C was added to the two critical sed commands. POSIX sed manual recommends it when using sed to manipulate pathnames because in other locales invalid multibyte sequences might cause issues with some sed implementations. In case of GNU sed, these particular sed scripts wouldn't have such problems but some other scripts could have, see: info '(sed)Locale Considerations' This vulnerability was discovered by: cleemy desu wayo working with Trend Micro Zero Day Initiative Thanks to Jim Meyering and Paul Eggert discussing the different ways to fix this and for coordinating the patch release schedule with gzip. src/scripts/xzgrep.in | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) commit bd93b776c1bd15e90661033c918cdeb354dbcc38 Author: Lasse Collin Date: 2022-03-26 01:02:44 +0200 liblzma: Fix a deadlock in threaded decoder. If a worker thread has consumed all input so far and it's waiting on thr->cond and then the main thread enables partial update for that thread, the code used to deadlock. This commit allows one dummy decoding pass to occur in this situation which then also does the partial update. As part of the fix, this moves thr->progress_* updates to avoid the second thr->mutex locking. Thanks to Jia Tan for finding, debugging, and reporting the bug. src/liblzma/common/stream_decoder_mt.c | 71 +++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 19 deletions(-) commit e0394e94230f208682ac1e1f4c41f22f9ad79916 Author: Lasse Collin Date: 2022-03-23 16:34:00 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 487c77d48760564b1949c5067630b675b87be4de Author: Lasse Collin Date: 2022-03-23 16:28:55 +0200 liblzma: Threaded decoder: Don't stop threads on LZMA_TIMED_OUT. LZMA_TIMED_OUT is not an error and thus stopping threads on LZMA_TIMED_OUT breaks the decoder badly. Thanks to Jia Tan for finding the bug and for the patch. src/liblzma/common/stream_decoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6c6da57ae2aa962aabde6892442227063d87e88c Author: Lasse Collin Date: 2022-03-07 00:36:16 +0200 xz: Add initial support for threaded decompression. If threading support is enabled at build time, this will use lzma_stream_decoder_mt() even for single-threaded mode. With memlimit_threading=0 the behavior should be identical. This needs some work like adding --memlimit-threading=LIMIT. The original patch from Sebastian Andrzej Siewior included a method to get currently available RAM on Linux. It might be one way to go but as it is Linux-only, the available-RAM approach needs work for portability or using a fallback method on other OSes. The man page wasn't updated yet. src/xz/coder.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) commit 4cce3e27f529af33e0e7749a8cbcec59954946b5 Author: Lasse Collin Date: 2022-03-06 23:36:20 +0200 liblzma: Add threaded .xz decompressor. I realize that this is about a decade late. Big thanks to Sebastian Andrzej Siewior for the original patch. I made a bunch of smaller changes but after a while quite a few things got rewritten. So any bugs in the commit were created by me. src/liblzma/api/lzma/container.h | 90 +- src/liblzma/common/Makefile.inc | 5 + src/liblzma/common/common.h | 4 + src/liblzma/common/stream_decoder_mt.c | 1814 ++++++++++++++++++++++++++++++++ src/liblzma/liblzma.map | 1 + 5 files changed, 1907 insertions(+), 7 deletions(-) commit 717631b9788dc9c100ee0c87d3c14a2782638ff4 Author: Lasse Collin Date: 2022-03-06 16:54:23 +0200 liblzma: Fix docs: lzma_block_decoder() cannot return LZMA_UNSUPPORTED_CHECK. If Check is unsupported, it will be silently ignored. It's the caller's job to handle it. src/liblzma/api/lzma/block.h | 3 --- 1 file changed, 3 deletions(-) commit 1a4bb97a00936535e30ac61945aeee38882b5d1a Author: Lasse Collin Date: 2022-03-06 16:41:19 +0200 liblzma: Add new output queue (lzma_outq) features. Add lzma_outq_clear_cache2() which may leave one buffer allocated in the cache. Add lzma_outq_outbuf_memusage() to get the memory needed for a single lzma_outbuf. This is now used internally in outqueue.c too. Track both the total amount of memory allocated and the amount of memory that is in active use (not in cache). In lzma_outbuf, allow storing the current input position that matches the current output position. This way the main thread can notice when no more output is possible without first providing more input. Allow specifying return code for lzma_outq_read() in a finished lzma_outbuf. src/liblzma/common/outqueue.c | 43 +++++++++++++++++++++++++++++++++++------- src/liblzma/common/outqueue.h | 44 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 8 deletions(-) commit ddbc6f58c2de388eed24cd7ea91b523d397da5f4 Author: Lasse Collin Date: 2022-03-06 15:18:58 +0200 liblzma: Index hash: Change return value type of hash_append() to void. src/liblzma/common/index_hash.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) commit 20e7a33e2d59c6a814447d3991f21e2702174b20 Author: Lasse Collin Date: 2022-02-22 03:42:57 +0200 liblzma: Minor addition to lzma_vli_size() API doc. Thanks to Jia Tan. src/liblzma/api/lzma/vli.h | 2 ++ 1 file changed, 2 insertions(+) commit 4f78f5fcf63592f2d77e921cfe0d5de300867374 Author: Lasse Collin Date: 2022-02-22 02:04:18 +0200 liblzma: Check the return value of lzma_index_append() in threaded encoder. If lzma_index_append() failed (most likely memory allocation failure) it could have gone unnoticed and the resulting .xz file would have an incorrect Index. Decompressing such a file would produce the correct uncompressed data but then an error would occur when verifying the Index field. src/liblzma/common/stream_encoder_mt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit 5313ad66b40aab822ddca3e9905254cb99a4080d Author: Lasse Collin Date: 2022-02-22 01:37:39 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 865e0a3689a25a7ee8eecae1a34c1775e3aa676e Author: Ed Maste Date: 2022-02-11 15:25:46 +0000 liblzma: Use non-executable stack on FreeBSD as on Linux src/liblzma/check/crc32_x86.S | 4 ++-- src/liblzma/check/crc64_x86.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) commit 1c9a5786d206b4abc8e427326651c8174baea753 Author: Lasse Collin Date: 2022-02-20 20:36:27 +0200 liblzma: Make Block decoder catch certain types of errors better. Now it limits the input and output buffer sizes that are passed to a raw decoder. This way there's no need to check if the sizes can grow too big or overflow when updating Compressed Size and Uncompressed Size counts. This also means that a corrupt file cannot cause the raw decoder to process useless extra input or output that would exceed the size info in Block Header (and thus cause LZMA_DATA_ERROR anyway). More importantly, now the size information is verified more carefully in case raw decoder returns LZMA_OK. This doesn't really matter with the current single-threaded .xz decoder as the errors would be detected slightly later anyway. But this helps avoiding corner cases in the upcoming threaded decompressor, and it might help other Block decoder uses outside liblzma too. The test files bad-1-lzma2-{9,10,11}.xz test these conditions. With the single-threaded .xz decoder the only difference is that LZMA_DATA_ERROR is detected in a difference place now. src/liblzma/common/block_decoder.c | 79 ++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 25 deletions(-) commit 555de11873eb00c9b94a8be70645db502e5a9dbd Author: Lasse Collin Date: 2022-02-20 19:38:55 +0200 Tests: Add bad-1-lzma2-11.xz. tests/files/README | 5 +++++ tests/files/bad-1-lzma2-11.xz | Bin 0 -> 64 bytes 2 files changed, 5 insertions(+) commit f0da507f22e7f4e3edb75b45b74d344244ca03fb Author: Lasse Collin Date: 2022-02-18 18:51:10 +0200 Translations: Fix po4a failure with the French man page translations. Thanks to Mario Blättermann for the patch. po4a/fr_FR.po | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit f7711d228c3c32395460c82498c60a9f730d0239 Author: Lasse Collin Date: 2022-02-07 01:14:37 +0200 Translations: Add French translation of man pages. This matches xz-utils 5.2.5-2 in Debian. The translation was done by "bubu", proofread by the debian-l10n-french mailing list contributors, and submitted to me on the xz-devel mailing list by Jean-Pierre Giraud. Thanks to everyone! po4a/fr_FR.po | 3541 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po4a/po4a.conf | 2 +- 2 files changed, 3542 insertions(+), 1 deletion(-) commit 6468f7e41a8e9c611e4ba8d34e2175c5dacdbeb4 Author: jiat75 Date: 2022-01-28 20:47:55 +0800 liblzma: Add NULL checks to LZMA and LZMA2 properties encoders. Previously lzma_lzma_props_encode() and lzma_lzma2_props_encode() assumed that the options pointers must be non-NULL because the with these filters the API says it must never be NULL. It is good to do these checks anyway. src/liblzma/lzma/lzma2_encoder.c | 3 +++ src/liblzma/lzma/lzma_encoder.c | 3 +++ 2 files changed, 6 insertions(+) commit 2523c30705f49eabd27b854aa656ae87cc224808 Author: Lasse Collin Date: 2022-02-06 23:19:32 +0200 liblzma: Fix uint64_t vs. size_t confusion. This broke 32-bit builds due to a pointer type mismatch. This bug was introduced with the output-size-limited encoding in 625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c. Thanks to huangqinjin for the bug report. src/liblzma/rangecoder/range_encoder.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) commit 2bd36c91d03e03b31a4f12fd0afc100ae32d66e2 Author: huangqinjin Date: 2021-12-13 20:49:21 +0800 CMake: Keep compatible with Windows 95 for 32-bit build. CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) commit 2024fbf2794885277d05378d40b2b8015a7c3b40 Author: Lasse Collin Date: 2021-11-13 21:04:05 +0200 xzgrep: Update man page timestamp. src/scripts/xzgrep.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 400e7a239a53282cedaad927a41f3463d7f542e5 Author: Lasse Collin Date: 2021-11-13 18:23:24 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 3a512c7787b2642ca946f4adc6e9a0a5d9b0d5a0 Author: Ville Skyttä Date: 2021-11-13 10:11:57 +0200 xzgrep: use `grep -E/-F` instead of `egrep` and `fgrep` `egrep` and `fgrep` have been deprecated in GNU grep since 2007, and in current post 3.7 Git they have been made to emit obsolescence warnings: https://git.savannah.gnu.org/cgit/grep.git/commit/?id=a9515624709865d480e3142fd959bccd1c9372d1 src/scripts/xzgrep.1 | 8 ++++---- src/scripts/xzgrep.in | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) commit edf525e2b1840dcaf377df472c67d8f11f8ace1b Author: Lasse Collin Date: 2021-10-28 23:02:11 +0300 Bump the version number for 5.3.2alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit ea8c948655a86290524efe59cff067e06a886709 Author: Lasse Collin Date: 2021-10-28 22:59:52 +0300 Add NEWS for 5.3.2alpha. NEWS | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) commit 52435f302f4724157ec50b4210cbe42b285c3cbc Author: Lasse Collin Date: 2021-10-27 23:27:48 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit f2aea1d5a504b2021bf47a238390e4f12bdd518d Author: Lasse Collin Date: 2021-10-27 23:23:11 +0300 xz: Change the coding style of the previous commit. It isn't any better now but it's consistent with the rest of the code base. src/xz/file_io.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) commit 892b16cc282f5b4e1c49871b326f4db25c5b4d81 Author: Alexander Bluhm Date: 2021-10-05 23:33:16 +0200 xz: Avoid fchown(2) failure. OpenBSD does not allow to change the group of a file if the user does not belong to this group. In contrast to Linux, OpenBSD also fails if the new group is the same as the old one. Do not call fchown(2) in this case, it would change nothing anyway. This fixes an issue with Perl Alien::Build module. https://github.com/PerlAlien/Alien-Build/issues/62 src/xz/file_io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) commit 2b509c868cae3988bf21cd693fbf9021cdc85628 Author: Lasse Collin Date: 2021-09-17 17:31:11 +0300 liblzma: Fix liblzma.map for the lzma_microlzma_* symbols. This should have been part of d267d109c370a40b502e73f8664b154b15e4f253. Thanks to Gao Xiang. src/liblzma/liblzma.map | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit cacb06a954b58255dfc084a0bc9708f43a0fd6d6 Author: Lasse Collin Date: 2021-09-09 22:21:07 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6928aac9da6ba612780b9f72ba1d6ecbe1e8b54e Author: Lasse Collin Date: 2021-09-09 21:41:51 +0300 liblzma: Use _MSVC_LANG to detect when "noexcept" can be used with MSVC. By default, MSVC always sets __cplusplus to 199711L. The real C++ standard version is available in _MSVC_LANG (or one could use /Zc:__cplusplus to set __cplusplus correctly). Fixes . Thanks to Dan Weiss. src/liblzma/api/lzma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit d267d109c370a40b502e73f8664b154b15e4f253 Author: Lasse Collin Date: 2021-09-05 20:38:12 +0300 liblzma: Rename EROFS LZMA to MicroLZMA. It still exists primarily for EROFS but MicroLZMA is a more generic name (that hopefully doesn't clash with something that already exists). src/liblzma/api/lzma/container.h | 33 +++++++++++++--------- src/liblzma/common/Makefile.inc | 4 +-- .../{erofs_decoder.c => microlzma_decoder.c} | 32 ++++++++++----------- .../{erofs_encoder.c => microlzma_encoder.c} | 30 ++++++++++---------- 4 files changed, 52 insertions(+), 47 deletions(-) commit 3247e95115acb95bc27f41e8cf4501db5b0b4309 Author: Lasse Collin Date: 2021-06-04 19:02:38 +0300 xzdiff: Update the man page about the exit status. This was forgotten from 194029ffaf74282a81f0c299c07f73caca3232ca. src/scripts/xzdiff.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 96f5a28a46fc93ac4e296808ac0f8631d05498bc Author: Lasse Collin Date: 2021-06-04 18:52:48 +0300 xzless: Fix less(1) version detection when it contains a dot. Sometimes the version number from "less -V" contains a dot, sometimes not. xzless failed detect the version number when it does contain a dot. This fixes it. Thanks to nick87720z for reporting this. Apparently it had been reported here in 2013. src/scripts/xzless.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5fb5212d816addbc523d0798cb482fdd0484f8fa Author: Lasse Collin Date: 2021-04-11 19:58:10 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit fc3d3a7296ef58bb799a73943636b8bfd95339f7 Author: Ivan A. Melnikov Date: 2021-04-09 11:45:10 +0300 Reduce maximum possible memory limit on MIPS32 Due to architectural limitations, address space available to a single userspace process on MIPS32 is limited to 2 GiB, not 4, even on systems that have more physical RAM -- e.g. 64-bit systems with 32-bit userspace, or systems that use XPA (an extension similar to x86's PAE). So, for MIPS32, we have to impose stronger memory limits. I've chosen 2000MiB to give the process some headroom. src/xz/hardware.c | 6 ++++++ 1 file changed, 6 insertions(+) commit e7da44d5151e21f153925781ad29334ae0786101 Author: Lasse Collin Date: 2021-02-13 23:31:27 +0200 CMake: Use interface library for better FindLibLZMA compatibility. https://www.mail-archive.com/xz-devel@tukaani.org/msg00446.html Thanks to Markus Rickert. CMakeLists.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) commit a61dd82ada39030f41b4ffca9ea551714908bedc Author: Lasse Collin Date: 2021-01-30 18:36:04 +0200 CMake: Try to improve compatibility with the FindLibLZMA module. The naming conflict with FindLibLZMA module gets worse. Not avoiding it in the first place was stupid. Normally find_package(LibLZMA) will use the module and find_package(liblzma 5.2.5 REQUIRED CONFIG) will use the config file even with a case insensitive file system. However, if CMAKE_FIND_PACKAGE_PREFER_CONFIG is TRUE and the file system is case insensitive, find_package(LibLZMA) will find our liblzma config file instead of using FindLibLZMA module. One big problem with this is that FindLibLZMA uses LibLZMA::LibLZMA and we use liblzma::liblzma as the target name. With target names CMake happens to be case sensitive. To workaround this, this commit adds add_library(LibLZMA::LibLZMA ALIAS liblzma::liblzma) to the config file. Then both spellings work. To make the behavior consistent between case sensitive and insensitive file systems, the config and related files are renamed from liblzmaConfig.cmake to liblzma-config.cmake style. With this style CMake looks for lowercase version of the package name so find_package(LiBLzmA 5.2.5 REQUIRED CONFIG) will work to find our config file. There are other differences between our config file and FindLibLZMA so it's still possible that things break for reasons other than the spelling of the target name. Hopefully those situations aren't too common. When the config file is available, it should always give as good or better results as FindLibLZMA so this commit doesn't affect the recommendation to use find_package(liblzma 5.2.5 REQUIRED CONFIG) which explicitly avoids FindLibLZMA. Thanks to Markus Rickert. CMakeLists.txt | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) commit 5b7bc1b8ae766a76710ca1b99f909cf52c697f05 Author: Lasse Collin Date: 2021-01-29 21:19:43 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6c6f0db340dcb8bb424411cedba713405d55f6b8 Author: Lasse Collin Date: 2021-01-29 21:19:08 +0200 liblzma: Fix unitialized variable. This was introduced two weeks ago in the commit 625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c. Thanks to Nathan Moinvaziri. src/liblzma/lzma/lzma_encoder.c | 1 + 1 file changed, 1 insertion(+) commit bb1d5c1fdd30550d4221ecd336e0be1206132a5c Author: Lasse Collin Date: 2021-01-24 22:32:41 +0200 Tests: Add bad-1-lzma2-10.xz and also modify -9.xz. tests/files/README | 11 +++++++++-- tests/files/bad-1-lzma2-10.xz | Bin 0 -> 60 bytes tests/files/bad-1-lzma2-9.xz | Bin 72 -> 72 bytes 3 files changed, 9 insertions(+), 2 deletions(-) commit 6b8abc84a5469792e0355d0bfc0784d41cfdfef7 Author: Lasse Collin Date: 2021-01-24 19:22:35 +0200 liblzma: Fix a wrong comment in stream_encoder_mt.c. src/liblzma/common/stream_encoder_mt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 939fc5ed654aac25fe0c8684b2df8dbeadb2de1e Author: Lasse Collin Date: 2021-01-24 18:51:51 +0200 Tests: Add bad-1-lzma2-9.xz. tests/files/README | 4 ++++ tests/files/bad-1-lzma2-9.xz | Bin 0 -> 72 bytes 2 files changed, 4 insertions(+) commit fdd30032f8531ac89519b48c21d810ecf06825f6 Author: Lasse Collin Date: 2021-01-24 17:02:00 +0200 Tests: Add bad-1-check-crc32-2.xz. tests/files/README | 7 +++++++ tests/files/bad-1-check-crc32-2.xz | Bin 0 -> 72 bytes 2 files changed, 7 insertions(+) commit db465419ae26ec7fb9b9472183911ff521620c77 Author: Lasse Collin Date: 2021-01-17 19:20:50 +0200 liblzma: In EROFS LZMA decoder, verify that comp_size matches at the end. When the uncompressed size is known to be exact, after decompressing the stream exactly comp_size bytes of input must have been consumed. This is a minor improvement to error detection. src/liblzma/common/erofs_decoder.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit 774cc0118ba2496581cb2621505a04bb6598cc75 Author: Lasse Collin Date: 2021-01-17 18:53:34 +0200 liblzma: Make EROFS LZMA decoder work when exact uncomp_size isn't known. The caller must still not specify an uncompressed size bigger than the actual uncompressed size. As a downside, this now needs the exact compressed size. src/liblzma/api/lzma/container.h | 23 ++++++++--- src/liblzma/common/erofs_decoder.c | 80 ++++++++++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 12 deletions(-) commit 421b0aa352da244075db10205cf33712f91b9835 Author: Lasse Collin Date: 2021-01-14 20:57:11 +0200 liblzma: Fix missing normalization in rc_encode_dummy(). Without this fix it could attempt to create too much output. src/liblzma/rangecoder/range_encoder.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit 601ec0311e769fc704daaaa7dac0ca840aff080e Author: Lasse Collin Date: 2021-01-14 20:07:01 +0200 liblzma: Add EROFS LZMA encoder and decoder. Right now this is just a planned extra-compact format for use in the EROFS file system in Linux. At this point it's possible that the format will either change or be abandoned and removed completely. The special thing about the encoder is that it uses the output-size-limited encoding added in the previous commit. EROFS uses fixed-sized blocks (e.g. 4 KiB) to hold compressed data so the compressors must be able to create valid streams that fill the given block size. src/liblzma/api/lzma/container.h | 76 +++++++++++++++++++ src/liblzma/common/Makefile.inc | 2 + src/liblzma/common/erofs_decoder.c | 148 +++++++++++++++++++++++++++++++++++++ src/liblzma/common/erofs_encoder.c | 139 ++++++++++++++++++++++++++++++++++ src/liblzma/liblzma.map | 2 + 5 files changed, 367 insertions(+) commit 625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c Author: Lasse Collin Date: 2021-01-13 19:16:32 +0200 liblzma: Add rough support for output-size-limited encoding in LZMA1. With this it is possible to encode LZMA1 data without EOPM so that the encoder will encode as much input as it can without exceeding the specified output size limit. The resulting LZMA1 stream will be a normal LZMA1 stream without EOPM. The actual uncompressed size will be available to the caller via the uncomp_size pointer. One missing thing is that the LZMA layer doesn't inform the LZ layer when the encoding is finished and thus the LZ may read more input when it won't be used. However, this doesn't matter if encoding is done with a single call (which is the planned use case for now). For proper multi-call encoding this should be improved. This commit only adds the functionality for internal use. Nothing uses it yet. src/liblzma/common/common.h | 11 +++ src/liblzma/lz/lz_encoder.c | 16 ++++ src/liblzma/lz/lz_encoder.h | 4 + src/liblzma/lzma/lzma_encoder.c | 127 +++++++++++++++++++++++--------- src/liblzma/lzma/lzma_encoder_private.h | 12 +++ src/liblzma/rangecoder/range_encoder.h | 111 ++++++++++++++++++++++++++++ 6 files changed, 246 insertions(+), 35 deletions(-) commit 9cdabbeea891e8f1e7741b076f7db6ac05ae392a Author: Lasse Collin Date: 2021-01-11 23:57:11 +0200 Scripts: Add zstd support to xzdiff. src/scripts/xzdiff.1 | 6 ++++-- src/scripts/xzdiff.in | 16 +++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) commit d9ec3add97cf4c999a7f594c6529680227b6c274 Author: Lasse Collin Date: 2021-01-11 23:41:30 +0200 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 074259f4f3966aeac6edb205fecbc1a8d2b58bb2 Author: Lasse Collin Date: 2021-01-11 23:41:16 +0200 xz: Make --keep accept symlinks, hardlinks, and setuid/setgid/sticky. Previously this required using --force but that has other effects too which might be undesirable. Changing the behavior of --keep has a small risk of breaking existing scripts but since this is a fairly special corner case I expect the likehood of breakage to be low enough. I think the new behavior is more logical. The only reason for the old behavior was to be consistent with gzip and bzip2. Thanks to Vincent Lefevre and Sebastian Andrzej Siewior. src/xz/file_io.c | 9 +++++---- src/xz/xz.1 | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) commit 73c555b3077c19dda29b6f4592ced2af876f8333 Author: Lasse Collin Date: 2021-01-11 23:28:52 +0200 Scripts: Fix exit status of xzgrep. Omit the -q option from xz, gzip, and bzip2. With xz this shouldn't matter. With gzip it's important because -q makes gzip replace SIGPIPE with exit status 2. With bzip2 it's important because with -q bzip2 is completely silent if input is corrupt while other decompressors still give an error message. Avoiding exit status 2 from gzip is important because bzip2 uses exit status 2 to indicate corrupt input. Before this commit xzgrep didn't recognize corrupt .bz2 files because xzgrep was treating exit status 2 as SIGPIPE for gzip compatibility. zstd still needs -q because otherwise it is noisy in normal operation. The code to detect real SIGPIPE didn't check if the exit status was due to a signal (>= 128) and so could ignore some other exit status too. src/scripts/xzgrep.in | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) commit 194029ffaf74282a81f0c299c07f73caca3232ca Author: Lasse Collin Date: 2021-01-11 22:01:51 +0200 Scripts: Fix exit status of xzdiff/xzcmp. This is a minor fix since this affects only the situation when the files differ and the exit status is something else than 0. In such case there could be SIGPIPE from a decompression tool and that would result in exit status of 2 from xzdiff/xzcmp while the correct behavior would be to return 1 or whatever else diff or cmp may have returned. This commit omits the -q option from xz/gzip/bzip2/lzop arguments. I'm not sure why the -q was used in the first place, perhaps it hides warnings in some situation that I cannot see at the moment. Hopefully the removal won't introduce a new bug. With gzip the -q option was harmful because it made gzip return 2 instead of >= 128 with SIGPIPE. Ignoring exit status 2 (warning from gzip) isn't practical because bzip2 uses exit status 2 to indicate corrupt input file. It's better if SIGPIPE results in exit status >= 128. With bzip2 the removal of -q seems to be good because with -q it prints nothing if input is corrupt. The other tools aren't silent in this situation even with -q. On the other hand, if zstd support is added, it will need -q since otherwise it's noisy in normal situations. Thanks to Étienne Mollier and Sebastian Andrzej Siewior. src/scripts/xzdiff.in | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) commit f7fa309e1f7178d04c7bedc03b73077639371e97 Author: Lasse Collin Date: 2021-01-09 21:14:36 +0200 liblzma: Make lzma_outq usable for threaded decompression too. Before this commit all output queue buffers were allocated as a single big allocation. Now each buffer is allocated separately when needed. Used buffers are cached to avoid reallocation overhead but the cache will keep only one buffer size at a time. This should make things work OK in the decompression where most of the time the buffer sizes will be the same but with some less common files the buffer sizes may vary. While this should work fine, it's still a bit preliminary and may even get reverted if it turns out to be useless for decompression. src/liblzma/common/outqueue.c | 268 +++++++++++++++++++++------------ src/liblzma/common/outqueue.h | 138 ++++++++++++----- src/liblzma/common/stream_encoder_mt.c | 52 ++++--- 3 files changed, 301 insertions(+), 157 deletions(-) commit a35a69d693ce37d4ba7c1855bda7d9cfa13d1778 Author: Lasse Collin Date: 2020-12-23 17:15:49 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4fd79b90c52396d70e0b1206ceb1a873a0ad2589 Author: H.J. Lu Date: 2020-12-23 06:49:04 -0800 liblzma: Enable Intel CET in x86 CRC assembly codes When Intel CET is enabled, we need to include in assembly codes to mark Intel CET support and add _CET_ENDBR to indirect jump targets. Tested on Intel Tiger Lake under CET enabled Linux. src/liblzma/check/crc32_x86.S | 9 +++++++++ src/liblzma/check/crc64_x86.S | 9 +++++++++ 2 files changed, 18 insertions(+) commit bb3b8c6a23e25db79f862b1de325c56052e0354b Author: Lasse Collin Date: 2020-12-16 18:33:29 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 21588ca34af98738954fc12ded1b89d7294ef646 Author: Lasse Collin Date: 2020-12-16 18:30:14 +0200 Build: Don't build bundles on Apple OSes. Thanks to Daniel Packard. CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) commit d05b0c42dd8b38d8c6b8193c8af50e9bd3d16f28 Author: Lasse Collin Date: 2020-12-05 22:44:03 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 1890351f3423627ba5c4c495402f32d7e9ed90b7 Author: Adam Borowski Date: 2020-09-25 03:35:18 +0200 Scripts: Add zstd support to xzgrep. Thanks to Adam Borowski. src/scripts/xzgrep.1 | 9 ++++++--- src/scripts/xzgrep.in | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) commit 2f108abb3d82e4e2313b438dae9c0c7c7a6366f2 Author: Lasse Collin Date: 2020-11-17 20:51:48 +0200 CMake: Fix compatibility with CMake 3.13. The syntax "if(DEFINED CACHE{FOO})" requires CMake 3.14. In some other places the code treats the cache variables like normal variables already (${FOO} or if(FOO) is used, not ${CACHE{FOO}). Thanks to ygrek for reporting the bug on IRC. CMakeLists.txt | 2 +- cmake/tuklib_cpucores.cmake | 4 ++-- cmake/tuklib_physmem.cmake | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) commit 5af726a79273fafa5de5745b117e567f21c90e49 Author: Lasse Collin Date: 2020-11-01 22:56:43 +0200 Update THANKS. THANKS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit 4575d9d365c756ec189899f9f743e0b3515ce72d Author: Lasse Collin Date: 2020-11-01 22:34:25 +0200 xz: Avoid unneeded \f escapes on the man page. I don't want to use \c in macro arguments but groff_man(7) suggests that \f has better portability. \f would be needed for the .TP strings for portability reasons anyway. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) commit 620b32f5339f86710cb4435e01ecdac972ccac73 Author: Lasse Collin Date: 2020-11-01 19:09:53 +0200 xz: Use non-breaking spaces when intentionally using more than one space. This silences some style checker warnings. Seems that spaces in the beginning of a line don't need this treatment. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit cb1f34988c8a4130485091b2f8b641303d8f701b Author: Lasse Collin Date: 2020-11-01 18:49:37 +0200 xz: Protect the ellipsis (...) on the man page with \&. This does it only when ... appears outside macro calls. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 5d224da3da87400f2fab313abbd7c710e7169ef9 Author: Lasse Collin Date: 2020-11-01 18:41:21 +0200 xz: Avoid the abbreviation "e.g." on the man page. A few are simply omitted, most are converted to "for example" and surrounded with commas. Sounds like that this is better style, for example, man-pages(7) recommends avoiding such abbreviations except in parenthesis. Thanks to Bjarni Ingi Gislason. src/xz/xz.1 | 66 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) commit 90457dbe3e5717660f5b81f8c604860fc5137c0c Author: Lasse Collin Date: 2020-07-12 23:10:03 +0300 xz man page: Change \- (minus) to \(en (en-dash) for a numeric range. Docs of ancient troff/nroff mention \(em (em-dash) but not \(en and \- was used for both minus and en-dash. I don't know how portable \(en is nowadays but it can be changed back if someone complains. At least GNU groff and OpenBSD's mandoc support it. Thanks to Bjarni Ingi Gislason for the patch. src/xz/xz.1 | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) commit 352ba2d69af2136bc814aa1df1a132559d445616 Author: Lasse Collin Date: 2020-07-12 20:46:24 +0300 Windows: Fix building of resource files when config.h isn't used. Now CMake + Visual Studio works for building liblzma.dll. Thanks to Markus Rickert. src/common/common_w32res.rc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit a9e2a87f1d61dcf684d809bf08c8ebea93f8a480 Author: Lasse Collin Date: 2020-04-06 19:31:50 +0300 src/scripts/xzgrep.1: Filenames to xzgrep are optional. xzgrep --help was correct already. src/scripts/xzgrep.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a7ba275d9b855d186abb29eb7a4f4cb6d9ca6fe0 Author: Bjarni Ingi Gislason Date: 2020-03-26 22:17:31 +0000 src/script/xzgrep.1: Remove superfluous '.RB' Output is from: test-groff -b -e -mandoc -T utf8 -rF0 -t -w w -z [ "test-groff" is a developmental version of "groff" ] Input file is ./src/scripts/xzgrep.1 :20 (macro RB): only 1 argument, but more are expected :23 (macro RB): only 1 argument, but more are expected :26 (macro RB): only 1 argument, but more are expected :29 (macro RB): only 1 argument, but more are expected :32 (macro RB): only 1 argument, but more are expected "abc..." does not mean the same as "abc ...". The output from nroff and troff is unchanged except for the space between "file" and "...". Signed-off-by: Bjarni Ingi Gislason src/scripts/xzgrep.1 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 133d498db0f4b14f066d192d64dbcade45deae6b Author: Bjarni Ingi Gislason Date: 2020-03-30 21:56:36 +0000 xzgrep.1: Delete superfluous '.PP' Summary: mandoc -T lint xzgrep.1 : mandoc: xzgrep.1:79:2: WARNING: skipping paragraph macro: PP empty There is no change in the output of "nroff" and "troff". Signed-off-by: Bjarni Ingi Gislason src/scripts/xzgrep.1 | 1 - 1 file changed, 1 deletion(-) commit 057839ca982f886387b66746bffe749cb14fd8cd Author: Bjarni Ingi Gislason Date: 2020-03-26 21:16:18 +0000 src/xz/xz.1: Correct misused two-fonts macros Output is from: test-groff -b -e -mandoc -T utf8 -rF0 -t -w w -z [ "test-groff" is a developmental version of "groff" ] Input file is ./src/xz/xz.1 :408 (macro BR): only 1 argument, but more are expected :1009 (macro BR): only 1 argument, but more are expected :1743 (macro BR): only 1 argument, but more are expected :1920 (macro BR): only 1 argument, but more are expected :2213 (macro BR): only 1 argument, but more are expected Output from nroff and troff is unchanged, except for a font change of a full stop (.). Signed-off-by: Bjarni Ingi Gislason src/xz/xz.1 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit b8e12f5ab4c9fd3cb09a4330b2861f6b979ababd Author: Lasse Collin Date: 2020-03-23 18:07:50 +0200 Typo fixes from fossies.org. https://fossies.org/linux/misc/xz-5.2.5.tar.xz/codespell.html Makefile.am | 2 +- doc/examples/01_compress_easy.c | 2 +- src/liblzma/api/lzma/base.h | 2 +- src/liblzma/check/crc32_x86.S | 2 +- src/liblzma/common/index.c | 2 +- src/xz/xz.1 | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) commit 869b9d1b4edd6df07f819d360d306251f8147353 Author: Lasse Collin Date: 2020-03-17 16:24:28 +0200 Update NEWS for 5.2.5. NEWS | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) commit a048e3a92d238c65f050a765174d9c75417231d4 Author: Lasse Collin Date: 2020-03-16 20:01:37 +0200 README: Update outdated sections. README | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) commit 29aed815ad4f98f3e4d355faa76a244ecd8ce716 Author: Lasse Collin Date: 2020-03-16 19:39:45 +0200 README: Mention that man pages can be translated. README | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 7fa7653940cc9dcfcbce2fbc5166ea343ad4e3c1 Author: Lasse Collin Date: 2020-03-16 16:43:29 +0200 Update INSTALL.generic from Automake 1.16.1. INSTALL.generic | 321 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 162 insertions(+), 159 deletions(-) commit 9bd317ef03ab9b3e6a927c27c2e9c4ac041182f0 Author: Lasse Collin Date: 2020-03-15 15:27:22 +0200 Update INSTALL for Windows and DOS and add preliminary info for z/OS. INSTALL | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) commit a3148c0446dc7fa96363752df414d22539c9007b Author: Lasse Collin Date: 2020-03-15 15:26:20 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive (again). m4/ax_pthread.m4 | 219 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 117 insertions(+), 102 deletions(-) commit 7812002dd3ed319e42a14662a8531802cca8ca67 Author: Lasse Collin Date: 2020-03-11 21:15:35 +0200 xz: Never use thousand separators in DJGPP builds. DJGPP 2.05 added support for thousands separators but it's broken at least under WinXP with Finnish locale that uses a non-breaking space as the thousands separator. Workaround by disabling thousands separators for DJGPP builds. src/xz/util.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) commit 7c8f688bf7fccd65d396e0130cbf4ea5dff5c56f Author: Lasse Collin Date: 2020-03-11 19:38:08 +0200 DOS: Update dos/Makefile for DJGPP 2.05. It doesn't need -fgnu89-inline like 2.04beta did. dos/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit 319ca928d73de87940c54e30bffe69f9fa65efdf Author: Lasse Collin Date: 2020-03-11 19:36:07 +0200 DOS: Update instructions in dos/INSTALL.txt. dos/INSTALL.txt | 59 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) commit cb6b227ce39932824812ccd8a0647bd968de27d2 Author: Lasse Collin Date: 2020-03-11 17:58:51 +0200 DOS: Update config.h. The added defines assume GCC >= 4.8. dos/config.h | 8 ++++++++ 1 file changed, 8 insertions(+) commit 4572d53e16e87eee375bc5624de2fd59bb0ae9cd Author: Lasse Collin Date: 2020-03-02 13:54:33 +0200 liblzma: Fix a comment and RC_SYMBOLS_MAX. The comment didn't match the value of RC_SYMBOLS_MAX and the value itself was slightly larger than actually needed. The only harm about this was that memory usage was a few bytes larger. src/liblzma/rangecoder/range_encoder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 265daa873c0d871f5f23f9b56e133a6f20045a0a Author: Lasse Collin Date: 2020-02-27 20:58:52 +0200 Build: Make CMake build fail if tuklib_cpucores or tuklib_physmem fails. CMakeLists.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 7c8b904527cdbe61248c80edcc2e20d840c4fef9 Author: Lasse Collin Date: 2020-02-27 20:24:27 +0200 Build: Add support for --no-po4a option to autogen.sh. Normally, if po4a isn't available, autogen.sh will return with non-zero exit status. The option --no-po4a can be useful when one knows that po4a isn't available but wants autogen.sh to still return with zero exit status. autogen.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) commit 292a5c0f9c9b3a66f5a5c652dc46381836d4537f Author: Lasse Collin Date: 2020-02-25 21:35:14 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 474320e9908786ba2021035f9013191e16cde08a Author: Lasse Collin Date: 2020-02-25 20:42:31 +0200 Build: Fix bugs in the CMake files. Seems that the phrase "add more quotes" from sh/bash scripting applies to CMake as well. E.g. passing an unquoted list ${FOO} to a function that expects one argument results in only the first element of the list being passed as an argument and the rest get ignored. Adding quotes helps ("${FOO}"). list(INSERT ...) is weird. Inserting an empty string to an empty variable results in empty list, but inserting it to a non-empty variable does insert an empty element to the list. Since INSERT requires at least one element, "${CMAKE_THREAD_LIBS_INIT}" needs to be quoted in CMakeLists.txt. It might result in an empty element in the list. It seems to not matter as empty elements consistently get ignored in that variable. In fact, calling cmake_check_push_state() and cmake_check_pop_state() will strip the empty elements from CMAKE_REQUIRED_LIBRARIES! In addition to quoting fixes, this fixes checks for the cache variables in tuklib_cpucores.cmake and tuklib_physmem.cmake. Thanks to Martin Matuška for testing and reporting the problems. These fixes aren't tested yet but hopefully they soon will be. CMakeLists.txt | 52 ++++++++++++++++++++++----------------------- cmake/tuklib_common.cmake | 8 ++++--- cmake/tuklib_cpucores.cmake | 30 ++++++++++++++------------ cmake/tuklib_integer.cmake | 34 +++++++++++++++-------------- cmake/tuklib_mbstr.cmake | 6 +++--- cmake/tuklib_physmem.cmake | 29 +++++++++++++------------ cmake/tuklib_progname.cmake | 4 ++-- 7 files changed, 85 insertions(+), 78 deletions(-) commit 7e3493d40eac0c3fa3d5124097745a70e15c41f6 Author: Lasse Collin Date: 2020-02-24 23:38:16 +0200 Build: Add very limited experimental CMake support. This does *NOT* replace the Autotools-based build system in the foreseeable future. See the comment in the beginning of CMakeLists.txt. So far this has been tested only on GNU/Linux but I commit it anyway to make it easier for others to test. Since I haven't played much with CMake before, it's likely that there are things that have been done in a silly or wrong way and need to be fixed. CMakeLists.txt | 643 ++++++++++++++++++++++++++++++++++++++++++++ cmake/tuklib_common.cmake | 47 ++++ cmake/tuklib_cpucores.cmake | 173 ++++++++++++ cmake/tuklib_integer.cmake | 100 +++++++ cmake/tuklib_mbstr.cmake | 20 ++ cmake/tuklib_physmem.cmake | 149 ++++++++++ cmake/tuklib_progname.cmake | 19 ++ 7 files changed, 1151 insertions(+) commit 21bd4701fca3e9002ce78bc135debca369ed8545 Author: Lasse Collin Date: 2020-02-24 23:37:07 +0200 Update m4/.gitignore. m4/.gitignore | 1 + 1 file changed, 1 insertion(+) commit e094d1d0f196a91ec703e8d0055948feef349ae8 Author: Lasse Collin Date: 2020-02-24 23:29:35 +0200 tuklib: Omit an unneeded from a tests. tuklib_cpucores.c and tuklib_physmem.c don't include even via other files in this package, so clearly that header isn't needed in the tests either (no one has reported build problems due to a missing header in a .c file). m4/tuklib_cpucores.m4 | 1 - m4/tuklib_physmem.m4 | 1 - 2 files changed, 2 deletions(-) commit b3ed19a55fe99a45bd77614e149d39d18498075c Author: Lasse Collin Date: 2020-02-24 23:01:00 +0200 liblzma: Remove unneeded from fastpos_tablegen.c. This file only generates fastpos_table.c. It isn't built as a part of liblzma. src/liblzma/lzma/fastpos_tablegen.c | 1 - 1 file changed, 1 deletion(-) commit 7b8982b29179b3c586e0456dc9ecbd4f58dcea59 Author: Lasse Collin Date: 2020-02-22 14:15:07 +0200 Use defined(__GNUC__) before __GNUC__ in preprocessor lines. This should silence the equivalent of -Wundef in compilers that don't define __GNUC__. src/common/sysdefs.h | 3 ++- src/liblzma/api/lzma.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) commit 43dfe04e6209c691cf4fbe3072d4ee91271748f1 Author: Lasse Collin Date: 2020-02-21 17:40:02 +0200 liblzma: Add more uses of lzma_memcmplen() to the normal mode of LZMA. This gives a tiny encoder speed improvement. This could have been done in 2014 after the commit 544aaa3d13554e8640f9caf7db717a96360ec0f6 but it was forgotten. src/liblzma/lzma/lzma_encoder_optimum_normal.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) commit 59e6eb4840b9f52fa3a61544974017279b448216 Author: Lasse Collin Date: 2020-02-21 17:01:15 +0200 Build: Add visibility.m4 from gnulib. Appears that this file used to get included as a side effect of gettext. After the change to gettext version requirements this file no longer got copied to the package and so the build was broken. m4/.gitignore | 1 - m4/visibility.m4 | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) commit 7fe3ef2eaa53d439cec043727ea1998f4ff0e22a Author: Lasse Collin Date: 2020-02-21 16:10:44 +0200 xz: Silence a warning when sig_atomic_t is long int. It can be true at least on z/OS. src/xz/signals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b0a2a77d10940c42b449d47a005bfc2e50ab5db8 Author: Lasse Collin Date: 2020-02-21 15:59:26 +0200 xz: Avoid unneeded access of a volatile variable. src/xz/signals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 524c2f12c762032b819757aeda8af7c47c4cabce Author: Lasse Collin Date: 2020-02-21 01:24:18 +0200 tuklib_integer.m4: Optimize the check order. The __builtin byteswapping is the preferred one so check for it first. m4/tuklib_integer.m4 | 56 +++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) commit 57360bb4fd79b358b36d2877db26ac828d1fdfcb Author: Lasse Collin Date: 2020-02-20 18:54:04 +0200 tuklib_exit: Add missing header. strerror() needs which happened to be included via tuklib_common.h -> tuklib_config.h -> sysdefs.h if HAVE_CONFIG_H was defined. This wasn't tested without config.h before so it had worked fine. src/common/tuklib_exit.c | 1 + 1 file changed, 1 insertion(+) commit fddd31175e74a538997a939d930462fde17d2dd4 Author: Lasse Collin Date: 2020-02-18 19:12:35 +0200 Revert the previous commit and add a comment. The previous commit broke crc32_tablegen.c. If the whole package is built without config.h (with defines set on the compiler command line) this should still work fine as long as these headers conform to C99 well enough. src/common/tuklib_config.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) commit 4e4e9fbb7e66d45319525ac224bff48fbdd0cf6e Author: Lasse Collin Date: 2020-02-17 23:37:20 +0200 Do not check for HAVE_CONFIG_H in tuklib_config.h. In XZ Utils sysdefs.h takes care of it and the required headers. src/common/tuklib_config.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) commit 2d4cef954feba82073951358466a1d614141cf33 Author: Lasse Collin Date: 2020-02-16 11:18:28 +0200 sysdefs.h: Omit the conditionals around string.h and limits.h. string.h is used unconditionally elsewhere in the project and configure has always stopped if limits.h is missing, so these headers must have been always available even on the weirdest systems. src/common/sysdefs.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit feb9c1969bc3eb33d4ecb72cfa897f92dae84939 Author: Lasse Collin Date: 2020-02-15 15:07:11 +0200 Build: Bump Autoconf and Libtool version requirements. There is no specific reason for this other than blocking the most ancient versions. These are still old: Autoconf 2.69 (2012) Automake 1.12 (2012) gettext 0.19.6 (2015) Libtool 2.4 (2010) configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 3d576cf92158d62790017ad7f2dd6dc1dd6b42bb Author: Lasse Collin Date: 2020-02-15 03:08:32 +0200 Build: Use AM_GNU_GETTEXT_REQUIRE_VERSION and require 0.19.6. This bumps the version requirement from 0.19 (from 2014) to 0.19.6 (2015). Using only the old AM_GNU_GETTEXT_VERSION results in old gettext infrastructure being placed in the package. By using both macros we get the latest gettext files while the other programs in the Autotools family can still see the old macro. configure.ac | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit fa792b8befaf7cb3960b655e0a9410da866d756f Author: Lasse Collin Date: 2020-02-14 20:42:06 +0200 Translations: Add German translation of the man pages. Thanks to Mario Blättermann. po4a/de.po | 5532 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po4a/po4a.conf | 2 +- 2 files changed, 5533 insertions(+), 1 deletion(-) commit 6f7211b6bb47a895b47f533282dba9ee9a1b0c8b Author: Lasse Collin Date: 2020-02-07 15:32:21 +0200 Build: Add support for translated man pages using po4a. The dependency on po4a is optional. It's never required to install the translated man pages when xz is built from a release tarball. If po4a is missing when building from xz.git, the translated man pages won't be generated but otherwise the build will work normally. The translations are only updated automatically by autogen.sh and by "make mydist". This makes it easy to keep po4a as an optional dependency and ensures that I won't forget to put updated translations to a release tarball. The translated man pages aren't installed if --disable-nls is used. The installation of translated man pages abuses Automake internals by calling "install-man" with redefined dist_man_MANS and man_MANS. This makes the hairy script code slightly less hairy. If it breaks some day, this code needs to be fixed; don't blame Automake developers. Also, this adds more quotes to the existing shell script code in the Makefile.am "-hook"s. Makefile.am | 4 ++++ autogen.sh | 8 ++++--- po4a/.gitignore | 2 ++ po4a/po4a.conf | 14 +++++++++++ po4a/update-po | 45 ++++++++++++++++++++++++++++++++++ src/scripts/Makefile.am | 64 +++++++++++++++++++++++++++++++++++++------------ src/xz/Makefile.am | 50 +++++++++++++++++++++++++++----------- src/xzdec/Makefile.am | 55 ++++++++++++++++++++++++++++++++---------- 8 files changed, 197 insertions(+), 45 deletions(-) commit 426f9e5819ff7710a5ff573a96c02940be65d52f Author: Lasse Collin Date: 2020-02-06 17:31:38 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit e3a4481d020e4de89efa037f335cf50f3ca55592 Author: Lasse Collin Date: 2020-02-05 22:35:06 +0200 Update tests/.gitignore. .gitignore | 4 ++++ 1 file changed, 4 insertions(+) commit 07208de92f2d5bca764f6d0ebe9d9866051dc4ef Author: Lasse Collin Date: 2020-02-05 22:28:51 +0200 Update m4/.gitignore. m4/.gitignore | 1 + 1 file changed, 1 insertion(+) commit c91fbf223db46c3b3cb9df769863a1a60cd9c908 Author: Lasse Collin Date: 2020-02-05 20:47:38 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 15a133b6d1a3eab4faf6eb52a71fdc56bd65846f Author: Lasse Collin Date: 2020-02-05 20:40:14 +0200 xz: Make it a fatal error if enabling the sandbox fails. Perhaps it's too drastic but on the other hand it will let me learn about possible problems if people report the errors. This won't be backported to the v5.2 branch. src/xz/file_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit af0fb386ef55db66654ae39e2deec6e04190c4ff Author: Lasse Collin Date: 2020-02-05 20:33:50 +0200 xz: Comment out annoying sandboxing messages. src/xz/file_io.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 986d8c9b52b824474088e5bb3b6940651660f0e2 Author: Lasse Collin Date: 2020-02-05 19:33:37 +0200 Build: Workaround a POSIX shell detection problem on Solaris. I don't know if the problem is in gnulib's gl_POSIX_SHELL macro or if xzgrep does something that isn't in POSIX. The workaround adds a special case for Solaris: if /usr/xpg4/bin/sh exists and gl_cv_posix_shell wasn't overriden on the configure command line, use that shell for xzgrep and other scripts. That shell is known to work and exists on most Solaris systems. configure.ac | 10 ++++++++++ 1 file changed, 10 insertions(+) commit 6629ed929cc7d45a11e385f357ab58ec15e7e4ad Author: Lasse Collin Date: 2020-02-03 22:03:50 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive. m4/ax_pthread.m4 | 398 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 279 insertions(+), 119 deletions(-) commit 353970510895f6a80adfe60cf71b70a95adfa8bc Author: Lasse Collin Date: 2020-02-01 19:56:18 +0200 xz: Limit --memlimit-compress to at most 4020 MiB for 32-bit xz. See the code comment for reasoning. It's far from perfect but hopefully good enough for certain cases while hopefully doing nothing bad in other situations. At presets -5 ... -9, 4020 MiB vs. 4096 MiB makes no difference on how xz scales down the number of threads. The limit has to be a few MiB below 4096 MiB because otherwise things like "xz --lzma2=dict=500MiB" won't scale down the dict size enough and xz cannot allocate enough memory. With "ulimit -v $((4096 * 1024))" on x86-64, the limit in xz had to be no more than 4085 MiB. Some safety margin is good though. This is hack but it should be useful when running 32-bit xz on a 64-bit kernel that gives full 4 GiB address space to xz. Hopefully this is enough to solve this: https://bugzilla.redhat.com/show_bug.cgi?id=1196786 FreeBSD has a patch that limits the result in tuklib_physmem() to SIZE_MAX on 32-bit systems. While I think it's not the way to do it, the results on --memlimit-compress have been good. This commit should achieve practically identical results for compression while leaving decompression and tuklib_physmem() and thus lzma_physmem() unaffected. src/xz/hardware.c | 32 +++++++++++++++++++++++++++++++- src/xz/xz.1 | 21 ++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) commit ba76d67585f88677af9f48b48e7bdc3bb7687def Author: Lasse Collin Date: 2020-01-26 20:53:25 +0200 xz: Set the --flush-timeout deadline when the first input byte arrives. xz --flush-timeout=2000, old version: 1. xz is started. The next flush will happen after two seconds. 2. No input for one second. 3. A burst of a few kilobytes of input. 4. No input for one second. 5. Two seconds have passed and flushing starts. The first second counted towards the flush-timeout even though there was no pending data. This can cause flushing to occur more often than needed. xz --flush-timeout=2000, after this commit: 1. xz is started. 2. No input for one second. 3. A burst of a few kilobytes of input. The next flush will happen after two seconds counted from the time when the first bytes of the burst were read. 4. No input for one second. 5. No input for another second. 6. Two seconds have passed and flushing starts. src/xz/coder.c | 6 +----- src/xz/file_io.c | 6 +++++- src/xz/mytime.c | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) commit fd47fd62bbb1bfd13ab63869137971d8b390025f Author: Lasse Collin Date: 2020-01-26 20:19:19 +0200 xz: Move flush_needed from mytime.h to file_pair struct in file_io.h. src/xz/coder.c | 3 ++- src/xz/file_io.c | 3 ++- src/xz/file_io.h | 3 +++ src/xz/mytime.c | 3 --- src/xz/mytime.h | 4 ---- 5 files changed, 7 insertions(+), 9 deletions(-) commit 815035681063d5774d3640fc20b8ede783dd574e Author: Lasse Collin Date: 2020-01-26 14:49:22 +0200 xz: coder.c: Make writing output a separate function. The same code sequence repeats so it's nicer as a separate function. Note that in one case there was no test for opt_mode != MODE_TEST, but that was only because that condition would always be true, so this commit doesn't change the behavior there. src/xz/coder.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) commit 5a49e081a098455bcdbd95cefb90e9b18780fe58 Author: Lasse Collin Date: 2020-01-26 14:13:42 +0200 xz: Fix semi-busy-waiting in xz --flush-timeout. When input blocked, xz --flush-timeout=1 would wake up every millisecond and initiate flushing which would have nothing to flush and thus would just waste CPU time. The fix disables the timeout when no input has been seen since the previous flush. src/xz/coder.c | 4 ++++ src/xz/file_io.c | 15 +++++++++++---- src/xz/file_io.h | 4 ++++ 3 files changed, 19 insertions(+), 4 deletions(-) commit dcca70fe9fa3c4bec56cf9c79e966166c9a9cf6a Author: Lasse Collin Date: 2020-01-26 13:47:31 +0200 xz: Refactor io_read() a bit. src/xz/file_io.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) commit 4ae9ab70cd3214395756435d13d8d000368ca2cb Author: Lasse Collin Date: 2020-01-26 13:37:08 +0200 xz: Update a comment in file_io.h. src/xz/file_io.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit 3333ba4a6795a55cf0375329ba08152bd7fcbd46 Author: Lasse Collin Date: 2020-01-26 13:27:51 +0200 xz: Move the setting of flush_needed in file_io.c to a nicer location. src/xz/file_io.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit cf2df0f05ac98c1158c6e48145900b773223605d Author: Lasse Collin Date: 2020-01-19 21:54:33 +0200 Use $(LIB_FUZZING_ENGINE) in tests/ossfuzz/Makefile. https://github.com/google/oss-fuzz/pull/3219#issuecomment-573751048 Thanks to Bhargava Shastry for sending the patch. tests/ossfuzz/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 7136f1735c60ac6967c4b8e277fcde53d485234f Author: Lasse Collin Date: 2019-12-31 00:41:28 +0200 Rename unaligned_read32ne to read32ne, and similarly for the others. src/common/tuklib_integer.h | 64 +++++++++++++++---------------- src/liblzma/common/alone_encoder.c | 2 +- src/liblzma/common/block_header_decoder.c | 2 +- src/liblzma/common/block_header_encoder.c | 2 +- src/liblzma/common/memcmplen.h | 9 ++--- src/liblzma/common/stream_flags_decoder.c | 6 +-- src/liblzma/common/stream_flags_encoder.c | 8 ++-- src/liblzma/lz/lz_encoder_hash.h | 2 +- src/liblzma/lzma/lzma_decoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- src/liblzma/lzma/lzma_encoder_private.h | 3 +- src/liblzma/simple/simple_decoder.c | 2 +- src/liblzma/simple/simple_encoder.c | 2 +- tests/test_block_header.c | 4 +- tests/test_stream_flags.c | 6 +-- 15 files changed, 54 insertions(+), 62 deletions(-) commit 5e78fcbf2eb21936022c9c5c3625d4da76f4b241 Author: Lasse Collin Date: 2019-12-31 00:29:48 +0200 Rename read32ne to aligned_read32ne, and similarly for the others. Using the aligned methods requires more care to ensure that the address really is aligned, so it's nicer if the aligned methods are prefixed. The next commit will remove the unaligned_ prefix from the unaligned methods which in liblzma are used in more places than the aligned ones. src/common/tuklib_integer.h | 56 +++++++++++++++++++++--------------------- src/liblzma/check/crc32_fast.c | 4 +-- src/liblzma/check/crc64_fast.c | 4 +-- 3 files changed, 32 insertions(+), 32 deletions(-) commit 77bc5bc6dd67056cfd5888520ac930cfc57b4516 Author: Lasse Collin Date: 2019-12-31 00:18:24 +0200 Revise tuklib_integer.h and .m4. Add a configure option --enable-unsafe-type-punning to get the old non-conforming memory access methods. It can be useful with old compilers or in some other less typical situations but shouldn't normally be used. Omit the packed struct trick for unaligned access. While it's best in some cases, this is simpler. If the memcpy trick doesn't work, one can request unsafe type punning from configure. Because CRC32/CRC64 code needs fast aligned reads, if no very safe way to do it is found, type punning is used as a fallback. This sucks but since it currently works in practice, it seems to be the least bad option. It's never needed with GCC >= 4.7 or Clang >= 3.6 since these support __builtin_assume_aligned and thus fast aligned access can be done with the memcpy trick. Other things: - Support GCC/Clang __builtin_bswapXX - Cleaner bswap fallback macros - Minor cleanups m4/tuklib_integer.m4 | 46 ++++- src/common/tuklib_integer.h | 488 ++++++++++++++++++++++++-------------------- 2 files changed, 316 insertions(+), 218 deletions(-) commit 8b72950a6b2e2a36c2d8fdc8857564b57191b088 Author: Lasse Collin Date: 2019-12-29 22:51:58 +0200 Tests: Hopefully fix test_check.c to work on EBCDIC systems. Thanks to Daniel Richard G. tests/test_check.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) commit 43ce4ea7c762238d3df9717b34126d3e0d7cd51c Author: Lasse Collin Date: 2019-09-24 23:02:40 +0300 Scripts: Put /usr/xpg4/bin to the beginning of PATH on Solaris. This adds a configure option --enable-path-for-scripts=PREFIX which defaults to empty except on Solaris it is /usr/xpg4/bin to make POSIX grep and others available. The Solaris case had been documented in INSTALL with a manual fix but it's better to do this automatically since it is needed on most Solaris systems anyway. Thanks to Daniel Richard G. INSTALL | 43 +++++++++++++++++++++++++++++++++++-------- configure.ac | 26 ++++++++++++++++++++++++++ src/scripts/xzdiff.in | 1 + src/scripts/xzgrep.in | 1 + src/scripts/xzless.in | 1 + src/scripts/xzmore.in | 1 + 6 files changed, 65 insertions(+), 8 deletions(-) commit 6a89e656ebedd53a10cd1a063a32a9e4ade0da1f Author: Lasse Collin Date: 2019-07-12 18:57:43 +0300 Fix comment typos in tuklib_mbstr* files. src/common/tuklib_mbstr.h | 2 +- src/common/tuklib_mbstr_fw.c | 2 +- src/common/tuklib_mbstr_width.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) commit ac0b4212656a48ef0c187c0c941d40ac9489ae36 Author: Lasse Collin Date: 2019-07-12 18:30:46 +0300 Add missing include to tuklib_mbstr_width.c. It didn't matter in XZ Utils because sysdefs.h includes string.h anyway. src/common/tuklib_mbstr_width.c | 1 + 1 file changed, 1 insertion(+) commit 72a443281fb0b91aebf8cdff2ab1f7c07b081240 Author: Lasse Collin Date: 2019-07-12 18:10:57 +0300 Update tuklib base headers to include stdbool.h. src/common/tuklib_common.h | 2 +- src/common/tuklib_config.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) commit de1f47b2b40e960b7bc3acba754f66dd19705921 Author: Lasse Collin Date: 2019-06-28 00:54:31 +0300 xz: Automatically align the strings in --info-memory. This makes it easier to translate the strings. Also, the string for amount of RAM was shortened. src/xz/hardware.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) commit 8ce679125dbd0e2058d8f886e738d7f19a45cab5 Author: Lasse Collin Date: 2019-06-25 23:15:21 +0300 liblzma: Fix a buggy comment. src/liblzma/lz/lz_encoder_mf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit fc77929e92e869f6869bf88931066103fd75f376 Author: Lasse Collin Date: 2019-06-25 00:16:06 +0300 configure.ac: Fix a typo in a comment. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit e873902641794210ad7db59743f98e3e1cd6139f Author: Lasse Collin Date: 2019-06-25 00:08:13 +0300 Tests: Silence warnings from clang -Wassign-enum. Also changed 999 to 99 so it fits even if lzma_check happened to be 8 bits wide. tests/test_block_header.c | 3 ++- tests/test_stream_flags.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit d499e467d99efeaae688564eedc4548837c1416a Author: Lasse Collin Date: 2019-06-24 23:52:17 +0300 liblzma: Add a comment. src/liblzma/common/stream_encoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a12b13c5f0d54c684fa8446f93fdac08ab2a716b Author: Lasse Collin Date: 2019-06-24 23:45:21 +0300 liblzma: Silence clang -Wmissing-variable-declarations. src/liblzma/check/crc32_table.c | 3 +++ src/liblzma/check/crc64_table.c | 3 +++ 2 files changed, 6 insertions(+) commit 1b4675cebf7471f7cc9b7072c950e3de97147063 Author: Lasse Collin Date: 2019-06-24 23:25:41 +0300 Add LZMA_RET_INTERNAL1..8 to lzma_ret and use one for LZMA_TIMED_OUT. LZMA_TIMED_OUT is *internally* used as a value for lzma_ret enumeration. Previously it was #defined to 32 and cast to lzma_ret. That way it wasn't visible in the public API, but this was hackish. Now the public API has eight LZMA_RET_INTERNALx members and LZMA_TIMED_OUT is #defined to LZMA_RET_INTERNAL1. This way the code is cleaner overall although the public API has a few extra mysterious enum members. src/liblzma/api/lzma/base.h | 15 ++++++++++++++- src/liblzma/common/common.c | 4 +--- src/liblzma/common/common.h | 5 ++--- src/xz/message.c | 8 ++++++++ 4 files changed, 25 insertions(+), 7 deletions(-) commit 159c43875eb25deea626ed651274464bae3e32ef Author: Lasse Collin Date: 2019-06-24 22:57:43 +0300 xz: Silence a warning from clang -Wsign-conversion in main.c. src/xz/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 466cfcd3e52f6750ce28a635997f3dd84fb18515 Author: Lasse Collin Date: 2019-06-24 22:52:20 +0300 xz: Make "headings" static in list.c. Caught by clang -Wmissing-variable-declarations. src/xz/list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 608517b9b76c41fac6613dbda1193d6f41338e19 Author: Lasse Collin Date: 2019-06-24 22:47:39 +0300 liblzma: Remove incorrect uses of lzma_attribute((__unused__)). Caught by clang -Wused-but-marked-unused. src/liblzma/common/alone_decoder.c | 3 +-- src/liblzma/common/alone_encoder.c | 3 +-- src/liblzma/lz/lz_decoder.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) commit c2d2ab6a9d41a2b55d047c5b710aacf80d219255 Author: Lasse Collin Date: 2019-06-24 20:53:55 +0300 Tests: Silence a warning from -Wsign-conversion. tests/create_compress_files.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit 2402f7873dcae719d0ebddd23bb579074519ac52 Author: Lasse Collin Date: 2019-06-24 20:45:49 +0300 xz: Fix an integer overflow with 32-bit off_t. Or any off_t which isn't very big (like signed 64 bit integer that most system have). A small off_t could overflow if the file being decompressed had long enough run of zero bytes, which would result in corrupt output. src/xz/file_io.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) commit 4fd3a8dd0b60f029e1c66a0ee634f9e9fda3caa9 Author: Lasse Collin Date: 2019-06-24 01:24:17 +0300 xz: Cleanup io_seek_src() a bit. lseek() returns -1 on error and checking for -1 is nicer. src/xz/file_io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit dfda7cf6afa486e10df035327d68753896dfb48a Author: Lasse Collin Date: 2019-06-24 00:57:23 +0300 Tests: Remove a duplicate branch from tests/tests.h. The duplication was introduced about eleven years ago and should have been cleaned up back then already. This was caught by -Wduplicated-branches. tests/tests.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) commit 1d4a904d8fb634bd5a04f7fbdd17d3739f3d8866 Author: Lasse Collin Date: 2019-06-24 00:40:45 +0300 xz: Change io_seek_src and io_pread arguments from off_t to uint64_t. This helps fixing warnings from -Wsign-conversion and makes the code look better too. src/xz/file_io.c | 16 ++++++++++++---- src/xz/file_io.h | 4 ++-- src/xz/list.c | 9 ++++----- 3 files changed, 18 insertions(+), 11 deletions(-) commit 50120deb0159fcb53ee1a6caffb2bb81a1ecd990 Author: Lasse Collin Date: 2019-06-24 00:12:38 +0300 xz: list.c: Fix some warnings from -Wsign-conversion. src/xz/list.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit d0a78751eb54fb1572002746c533936a118e4e42 Author: Lasse Collin Date: 2019-06-23 23:22:45 +0300 tuklib_mbstr_width: Fix a warning from -Wsign-conversion. src/common/tuklib_mbstr_width.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 7883d73530b4b2a701ddd7d50c35676cbc158039 Author: Lasse Collin Date: 2019-06-23 23:19:34 +0300 xz: Fix some of the warnings from -Wsign-conversion. src/xz/args.c | 4 ++-- src/xz/coder.c | 4 ++-- src/xz/file_io.c | 5 +++-- src/xz/message.c | 4 ++-- src/xz/mytime.c | 4 ++-- src/xz/options.c | 2 +- src/xz/util.c | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) commit c2b994fe3d35e9e575c28869a2f7f534f2495d05 Author: Lasse Collin Date: 2019-06-23 22:27:45 +0300 tuklib_cpucores: Silence warnings from -Wsign-conversion. src/common/tuklib_cpucores.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit 07c4fa9e1a195e0543f271380c8de22a3ab145ff Author: Lasse Collin Date: 2019-06-23 21:40:47 +0300 xzdec: Fix warnings from -Wsign-conversion. src/xzdec/xzdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit dfac2c9a1d7d4a2b8a5d7c9c6d567dee48318bcf Author: Lasse Collin Date: 2019-06-23 21:38:56 +0300 liblzma: Fix warnings from -Wsign-conversion. Also, more parentheses were added to the literal_subcoder macro in lzma_comon.h (better style but no functional change in the current usage). src/liblzma/common/block_header_decoder.c | 2 +- src/liblzma/delta/delta_decoder.c | 2 +- src/liblzma/lzma/fastpos.h | 2 +- src/liblzma/lzma/lzma2_decoder.c | 8 ++++---- src/liblzma/lzma/lzma_common.h | 3 ++- src/liblzma/lzma/lzma_decoder.c | 16 ++++++++-------- src/liblzma/simple/arm.c | 6 +++--- src/liblzma/simple/armthumb.c | 8 ++++---- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 9 +++++---- src/liblzma/simple/x86.c | 2 +- 11 files changed, 31 insertions(+), 29 deletions(-) commit 41838dcc26375f6aa393a63e4d81e2f4d223de07 Author: Lasse Collin Date: 2019-06-23 19:33:55 +0300 tuklib_integer: Silence warnings from -Wsign-conversion. src/common/tuklib_integer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 3ce05d235f736d392347a05086b8033416874b87 Author: Lasse Collin Date: 2019-06-20 19:40:30 +0300 tuklib_integer: Fix usage of conv macros. Use a temporary variable instead of e.g. conv32le(unaligned_read32ne(buf)) because the macro can evaluate its argument multiple times. src/common/tuklib_integer.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit b525b0c0ef40cd89b69294c9b8d57f4a8db58e1f Author: Lasse Collin Date: 2019-06-03 20:44:19 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 039a168e8cf201d5104a25ec41f0cf25eda6cc53 Author: Lasse Collin Date: 2019-06-03 20:41:54 +0300 liblzma: Fix comments. Thanks to Bruce Stark. src/liblzma/common/alone_encoder.c | 4 ++-- src/liblzma/common/block_util.c | 2 +- src/liblzma/common/common.c | 2 +- src/liblzma/common/filter_common.h | 2 +- src/liblzma/common/filter_decoder.h | 2 +- src/liblzma/common/filter_flags_encoder.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) commit c460f6defebc5a81bbca90adc2476154ca244f69 Author: Lasse Collin Date: 2019-06-02 00:50:59 +0300 liblzma: Fix one more unaligned read to use unaligned_read16ne(). src/liblzma/lz/lz_encoder_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c81d77c537f0b8c8672868e1dc6cf7290ce4a25b Author: Lasse Collin Date: 2019-06-01 21:41:55 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 386394fc9fcde2615391f804eaa466749f96f4ef Author: Lasse Collin Date: 2019-06-01 21:36:13 +0300 liblzma: memcmplen: Use ctz32() from tuklib_integer.h. The same compiler-specific #ifdefs are already in tuklib_integer.h src/liblzma/common/memcmplen.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) commit 264ab971ce2994baac41b1579c9c35aba7743fc8 Author: Lasse Collin Date: 2019-06-01 21:30:03 +0300 tuklib_integer: Cleanup MSVC-specific code. src/common/tuklib_integer.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) commit 33773c6f2a8711d4aa6656795db52c59a28580ec Author: Lasse Collin Date: 2019-06-01 19:01:21 +0300 liblzma: Use unaligned_readXXne functions instead of type punning. Now gcc -fsanitize=undefined should be clean. Thanks to Jeffrey Walton. src/liblzma/common/memcmplen.h | 12 ++++++------ src/liblzma/lzma/lzma_encoder_private.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) commit e5f13a66567b1987e0aae42c6fdcd277bb5810ba Author: Lasse Collin Date: 2019-06-01 18:46:54 +0300 tuklib_integer: Autodetect support for unaligned access on ARM. The result is used as the default for --enable-unaligned-access. The test should work with GCC and Clang. m4/tuklib_integer.m4 | 11 +++++++++++ 1 file changed, 11 insertions(+) commit 3bc112c2d38d5f348bce7bc2422286b1692c7490 Author: Lasse Collin Date: 2019-06-01 18:41:16 +0300 tuklib_integer: Improve unaligned memory access. Now memcpy() or GNU C packed structs for unaligned access instead of type punning. See the comment in this commit for details. Avoiding type punning with unaligned access is needed to silence gcc -fsanitize=undefined. New functions: unaliged_readXXne and unaligned_writeXXne where XX is 16, 32, or 64. src/common/tuklib_integer.h | 180 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 168 insertions(+), 12 deletions(-) commit 2a22de439ec63da1927b640eda309296a1e8dce5 Author: Lasse Collin Date: 2019-05-13 20:05:17 +0300 liblzma: Avoid memcpy(NULL, foo, 0) because it is undefined behavior. I should have always known this but I didn't. Here is an example as a reminder to myself: int mycopy(void *dest, void *src, size_t n) { memcpy(dest, src, n); return dest == NULL; } In the example, a compiler may assume that dest != NULL because passing NULL to memcpy() would be undefined behavior. Testing with GCC 8.2.1, mycopy(NULL, NULL, 0) returns 1 with -O0 and -O1. With -O2 the return value is 0 because the compiler infers that dest cannot be NULL because it was already used with memcpy() and thus the test for NULL gets optimized out. In liblzma, if a null-pointer was passed to memcpy(), there were no checks for NULL *after* the memcpy() call, so I cautiously suspect that it shouldn't have caused bad behavior in practice, but it's hard to be sure, and the problematic cases had to be fixed anyway. Thanks to Jeffrey Walton. src/liblzma/common/common.c | 6 +++++- src/liblzma/lz/lz_decoder.c | 12 +++++++++--- src/liblzma/simple/simple_coder.c | 10 +++++++++- 3 files changed, 23 insertions(+), 5 deletions(-) commit d3fc850cfedc058247d9e334ce59bbc8f2286d8a Author: Lasse Collin Date: 2019-05-11 20:56:08 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4adb8288ab61d5f14e212007b8742df0710baf73 Author: Lasse Collin Date: 2019-05-11 20:54:12 +0300 xz: Update xz man page date. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 2fb0ddaa557ce86e38fe06439930fa8665f092fd Author: Antoine Cœur Date: 2019-05-08 13:30:57 +0800 spelling Doxyfile.in | 2 +- NEWS | 2 +- doc/examples/11_file_info.c | 2 +- src/liblzma/api/lzma/block.h | 2 +- src/liblzma/api/lzma/hardware.h | 2 +- src/liblzma/api/lzma/lzma12.h | 2 +- src/liblzma/api/lzma/vli.h | 2 +- src/liblzma/common/file_info.c | 4 ++-- src/liblzma/common/hardware_physmem.c | 2 +- src/liblzma/common/index.c | 4 ++-- src/liblzma/common/stream_encoder_mt.c | 2 +- src/liblzma/common/vli_decoder.c | 2 +- src/liblzma/lz/lz_decoder.c | 2 +- src/scripts/xzgrep.in | 2 +- src/xz/args.c | 2 +- src/xz/coder.c | 4 ++-- src/xz/list.c | 4 ++-- src/xz/main.c | 2 +- src/xz/mytime.h | 2 +- src/xz/private.h | 2 +- src/xz/xz.1 | 2 +- windows/build.bash | 2 +- 22 files changed, 26 insertions(+), 26 deletions(-) commit 4ed339606156bd313ed99237485cb8ed0362d64f Author: Lasse Collin Date: 2019-05-01 18:43:10 +0300 xz: In xz -lvv look at the widths of the check names too. Now the widths of the check names is used to adjust the width of the Check column. This way there no longer is a need to restrict the widths of the check names to be at most ten terminal-columns. src/xz/list.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) commit 2f4281a1001dcf7fdf1418c0c0d246c16561fb65 Author: Lasse Collin Date: 2019-05-01 18:33:25 +0300 xz: Fix xz -lvv column alignment to look at the translated strings. src/xz/list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 01d01b7c7c0b8eaf7f780a5584ec52c22d10fa4a Author: Lasse Collin Date: 2019-05-01 16:52:36 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 64030c6b17f7743df03a9948a0ccfcdf40c6b97c Author: Lasse Collin Date: 2019-05-01 16:43:16 +0300 Windows: Update VS version in windows/vs2019/config.h. windows/vs2019/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 2dc9117f5fbfab31444a3ca1e55795ccfa8a9f51 Author: Julien Marrec Date: 2019-04-25 17:44:06 +0200 Windows: Upgrade solution itself windows/vs2019/xz_win.sln | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit ac31413916fa9b11bab17f0f0aa63e2869360f6f Author: Julien Marrec Date: 2019-04-25 17:40:24 +0200 Windows: Upgrade solution with VS2019 windows/vs2019/liblzma.vcxproj | 15 ++++++++------- windows/vs2019/liblzma_dll.vcxproj | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) commit be25a0c37ba92a20c390b4d17fe502457fe96b71 Author: Julien Marrec Date: 2019-04-25 17:39:32 +0200 Windows: Duplicate windows/vs2017 before upgrading windows/vs2019/config.h | 148 ++++++++++++++ windows/vs2019/liblzma.vcxproj | 356 ++++++++++++++++++++++++++++++++++ windows/vs2019/liblzma_dll.vcxproj | 385 +++++++++++++++++++++++++++++++++++++ windows/vs2019/xz_win.sln | 48 +++++ 4 files changed, 937 insertions(+) commit d0e58b3a51e8e616f3dc26ec7b7e4aa0fa6991ad Author: Lasse Collin Date: 2019-03-04 22:49:04 +0200 README: Update translation instructions. XZ Utils is now part of the Translation Project . README | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) commit a750c35a7d45a16c11c1d40fecee8443c32a9996 Author: Lasse Collin Date: 2019-03-04 21:20:39 +0200 xz: Automatically align column headings in xz -lvv. src/xz/list.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 212 insertions(+), 51 deletions(-) commit 6cb42e8aa1dc37bf403a9f5acbd07e86036b7e77 Author: Lasse Collin Date: 2019-03-04 01:07:59 +0200 xz: Automatically align strings ending in a colon in --list output. This should avoid alignment errors in translations with these strings. src/xz/list.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 102 insertions(+), 12 deletions(-) commit 1e3f29b62f2c03e50fc9ebea7b83c1497dd35484 Author: Lasse Collin Date: 2019-01-13 17:29:23 +0200 Windows/VS2017: Omit WindowsTargetPlatformVersion from project files. I understood that if a WTPV is specified, it's often wrong because different VS installations have different SDK version installed. Omitting the WTPV tag makes VS2017 default to Windows SDK 8.1 which often is also missing, so in any case people may need to specify the WTPV before building. But some day in the future a missing WTPV tag will start to default to the latest installed SDK which sounds reasonable: https://developercommunity.visualstudio.com/content/problem/140294/windowstargetplatformversion-makes-it-impossible-t.html Thanks to "dom". windows/INSTALL-MSVC.txt | 4 ++++ windows/vs2017/liblzma.vcxproj | 1 - windows/vs2017/liblzma_dll.vcxproj | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) commit 4d86076332aece6314063d3712a5f364172bbb0f Author: Lasse Collin Date: 2018-12-20 20:42:29 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit b55d79461d1f6aeaac03c7dae84481e5eb8bea4c Author: Lasse Collin Date: 2018-12-14 20:34:30 +0200 xz: Fix a crash in progress indicator when in passthru mode. "xz -dcfv not_an_xz_file" crashed (all four options are required to trigger it). It caused xz to call lzma_get_progress(&strm, ...) when no coder was initialized in strm. In this situation strm.internal is NULL which leads to a crash in lzma_get_progress(). The bug was introduced when xz started using lzma_get_progress() to get progress info for multi-threaded compression, so the bug is present in versions 5.1.3alpha and higher. Thanks to Filip Palian for the bug report. src/xz/coder.c | 11 +++++++---- src/xz/message.c | 18 ++++++++++++++++-- src/xz/message.h | 3 ++- 3 files changed, 25 insertions(+), 7 deletions(-) commit 4ae5526de013efd1021686fa80bdd10cf1cb9c56 Author: Lasse Collin Date: 2018-11-22 17:20:31 +0200 xz: Update man page timestamp. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 6a36d0d5f49e0080ff64dd9ef250abd489bea2ad Author: Pavel Raiskup Date: 2018-11-22 15:14:34 +0100 'have have' typos src/xz/signals.c | 2 +- src/xz/xz.1 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 9eca51ef805ed8002a851df1b4995d71826c8b6e Author: Lasse Collin Date: 2018-11-02 20:40:48 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 106d1a663d4ba42b63231caa289f531548df81c1 Author: Lasse Collin Date: 2018-11-02 20:18:45 +0200 Tests: Add a fuzz test program and a config file for OSS-Fuzz. Thanks to Bhargava Shastry and Github user pdknsk. tests/Makefile.am | 1 + tests/ossfuzz/Makefile | 7 ++++ tests/ossfuzz/config/fuzz.dict | 2 + tests/ossfuzz/config/fuzz.options | 2 + tests/ossfuzz/fuzz.c | 82 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+) commit a18ae42a79a19b1394b41eb3e238139fd28012ec Author: Lasse Collin Date: 2018-10-26 22:49:10 +0300 liblzma: Don't verify header CRC32s if building for fuzz testing. FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION is #defined when liblzma is being built for fuzz testing. Most fuzzed inputs would normally get rejected because of incorrect CRC32 and the actual header decoding code wouldn't get fuzzed. Disabling CRC32 checks avoids this problem. The fuzzer program must still use LZMA_IGNORE_CHECK flag to disable verification of integrity checks of uncompressed data. src/liblzma/common/block_header_decoder.c | 5 ++++- src/liblzma/common/index_decoder.c | 5 ++++- src/liblzma/common/index_hash.c | 5 ++++- src/liblzma/common/stream_flags_decoder.c | 10 ++++++++-- 4 files changed, 20 insertions(+), 5 deletions(-) commit f76f7516d6a1c832f61810c82e92d151cc80966c Author: Lasse Collin Date: 2018-07-27 18:10:44 +0300 xzless: Rename unused variables to silence static analysers. In this particular case I don't see this affecting readability of the code. Thanks to Pavel Raiskup. src/scripts/xzless.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 3cbcaeb07eb7543735befd6f507fdb5fa4363cff Author: Lasse Collin Date: 2018-07-27 16:02:58 +0300 liblzma: Remove an always-true condition from lzma_index_cat(). This should help static analysis tools to see that newg isn't leaked. Thanks to Pavel Raiskup. src/liblzma/common/index.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 76762ae6098ec55c326f4b4b4a42e8c1918ee81f Author: Lasse Collin Date: 2018-05-19 21:23:25 +0300 liblzma: Improve lzma_properties_decode() API documentation. src/liblzma/api/lzma/filter.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 2267f5b0d20a5d24e93fcd9f72ea7eeb0d89708c Author: Lasse Collin Date: 2018-04-29 18:58:19 +0300 Bump the version number to 5.3.1alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit cee3021d30704858e4bdd22240e7d28e570d7451 Author: Lasse Collin Date: 2018-04-29 18:48:00 +0300 extra/scanlzma: Fix compiler warnings. extra/scanlzma/scanlzma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit c5c7ceb08a011b97d261798033e2c39613a69eb7 Author: Lasse Collin Date: 2018-04-29 18:44:47 +0300 DOS: Add file_info.c to the list of files to build. dos/Makefile | 1 + 1 file changed, 1 insertion(+) commit 114cab97af766b21e0fc8620479202fb1e7a5e41 Author: Lasse Collin Date: 2018-04-29 18:33:10 +0300 Update NEWS for 5.3.1alpha. NEWS | 11 +++++++++++ 1 file changed, 11 insertions(+) commit b8139e11c512bbf32bf58ab0689f9bb6c52819da Author: Lasse Collin Date: 2018-04-29 18:15:37 +0300 Add NEWS for 5.2.4. NEWS | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) commit 47b59d47cfd904a420fbd45629d168ca1973721d Author: Lasse Collin Date: 2018-02-06 19:36:30 +0200 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit bc197991690ede24ab143665b5b0f0f9cb35cc46 Author: Ben Boeckel Date: 2018-01-29 13:58:18 -0500 nothrow: use noexcept for C++11 and newer In C++11, the `throw()` specifier is deprecated and `noexcept` is preffered instead. src/liblzma/api/lzma.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit fb6d4f83cb6e144734f2a4216bb117bd56dc3cb5 Author: Lasse Collin Date: 2018-02-06 18:02:48 +0200 liblzma: Remove incorrect #ifdef from range_common.h. In most cases it was harmless but it could affect some custom build systems. Thanks to Pippijn van Steenhoven. src/liblzma/rangecoder/range_common.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit bc577d35c2d0ed17f554d2d8107b2a2a9abbac76 Author: Lasse Collin Date: 2018-01-10 22:10:39 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 713bbc1a80f26d34c96ed3dbb9887362204de3a1 Author: Lasse Collin Date: 2018-01-10 21:54:27 +0200 tuklib_integer: New Intel C compiler needs immintrin.h. Thanks to Melanie Blower (Intel) for the patch. src/common/tuklib_integer.h | 11 +++++++++++ 1 file changed, 11 insertions(+) commit a0ee1afbd99da138b559cb27fa2022e7f1ab44f3 Author: Lasse Collin Date: 2017-09-24 20:04:24 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit a1e2c568de29c0b57d873eab40a2879b749da429 Author: Lasse Collin Date: 2017-09-16 20:36:20 +0300 Windows: Fix paths in VS project files. Some paths use slashes instead of backslashes as directory separators... now it should work (I tested VS2013 version). windows/vs2013/liblzma.vcxproj | 12 ++++++------ windows/vs2013/liblzma_dll.vcxproj | 24 ++++++++++++------------ windows/vs2017/liblzma.vcxproj | 12 ++++++------ windows/vs2017/liblzma_dll.vcxproj | 24 ++++++++++++------------ 4 files changed, 36 insertions(+), 36 deletions(-) commit cea5cf8d26c9d1dc30a808614d79c0b25640e15e Author: Lasse Collin Date: 2017-09-16 12:56:20 +0300 Windows: Update VS2017 project files to include file info decoder. windows/vs2017/liblzma.vcxproj | 2 ++ windows/vs2017/liblzma_dll.vcxproj | 2 ++ 2 files changed, 4 insertions(+) commit 95d563db3ee497b223e522b699c4d4c29943eef0 Author: Lasse Collin Date: 2017-09-16 12:54:23 +0300 Windows: Add project files for VS2017. These files match the v5.2 branch (no file info decoder). windows/vs2017/config.h | 148 ++++++++++++++ windows/vs2017/liblzma.vcxproj | 355 ++++++++++++++++++++++++++++++++++ windows/vs2017/liblzma_dll.vcxproj | 384 +++++++++++++++++++++++++++++++++++++ windows/vs2017/xz_win.sln | 48 +++++ 4 files changed, 935 insertions(+) commit ab72416d62ea8f50ad31d5b8545fcb6a2bf96b73 Author: Lasse Collin Date: 2017-09-16 12:45:50 +0300 Windows: Update VS2013 project files to include file info decoder. windows/vs2013/liblzma.vcxproj | 2 ++ windows/vs2013/liblzma_dll.vcxproj | 2 ++ 2 files changed, 4 insertions(+) commit 82388980187b0e3794d187762054200bbdcc9a53 Author: Lasse Collin Date: 2017-09-16 12:39:43 +0300 Windows: Move VS2013 files into windows/vs2013 directory. windows/{ => vs2013}/config.h | 0 windows/{ => vs2013}/liblzma.vcxproj | 278 +++++++++++++++--------------- windows/{ => vs2013}/liblzma_dll.vcxproj | 280 +++++++++++++++---------------- windows/{ => vs2013}/xz_win.sln | 0 4 files changed, 279 insertions(+), 279 deletions(-) commit 94e3f986aa4e14b4ff01ac24857f499630d6d180 Author: Lasse Collin Date: 2017-08-14 20:08:33 +0300 Fix or hide warnings from GCC 7's -Wimplicit-fallthrough. src/liblzma/lzma/lzma_decoder.c | 6 ++++++ src/xz/list.c | 2 ++ 2 files changed, 8 insertions(+) commit 0b0e1e6803456aac641a59332200f8e95e2b7ea8 Author: Alexey Tourbin Date: 2017-05-16 23:56:35 +0300 Docs: Fix a typo in a comment in doc/examples/02_decompress.c. doc/examples/02_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a015cd1f90116e655be4eaf4aad42c4c911c2807 Author: Lasse Collin Date: 2017-05-23 18:34:43 +0300 xz: Fix "xz --list --robot missing_or_bad_file.xz". It ended up printing an uninitialized char-array when trying to print the check names (column 7) on the "totals" line. This also changes the column 12 (minimum xz version) to 50000002 (xz 5.0.0) instead of 0 when there are no valid input files. Thanks to kidmin for the bug report. src/xz/list.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit c2e29f06a7d1e3ba242ac2fafc69f5d6e92f62cd Author: Lasse Collin Date: 2017-04-24 20:20:11 +0300 Docs: Add doc/examples/11_file_info.c. doc/examples/11_file_info.c | 206 ++++++++++++++++++++++++++++++++++++++++++++ doc/examples/Makefile | 3 +- 2 files changed, 208 insertions(+), 1 deletion(-) commit 1520f6ec808896375ac7bf778c449e0f7dea5f46 Author: Lasse Collin Date: 2017-04-24 19:48:47 +0300 Build: Omit pre-5.0.0 entries from the generated ChangeLog. It makes ChangeLog significantly smaller. Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 8269782283806c90a8509c2ac2a308344f70e171 Author: Lasse Collin Date: 2017-04-24 19:48:23 +0300 xz: Use lzma_file_info_decoder() for --list. src/xz/list.c | 254 ++++++++++------------------------------------------------ 1 file changed, 44 insertions(+), 210 deletions(-) commit e353d0b1cc0d3997ae5048faa8e6786414953e06 Author: Lasse Collin Date: 2017-04-24 19:35:50 +0300 liblzma: Add lzma_file_info_decoder(). src/liblzma/api/lzma/index.h | 66 ++++ src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/file_info.c | 855 ++++++++++++++++++++++++++++++++++++++++ src/liblzma/liblzma.map | 7 +- 4 files changed, 928 insertions(+), 1 deletion(-) commit 144ef9e19e9496c995b21505dd1e111c442968d1 Author: Lasse Collin Date: 2017-04-24 19:30:22 +0300 Update the Git repository URL to HTTPS in ChangeLog. ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 8c9842c265993d7dd4039f732d3546267fb5ecc4 Author: Lasse Collin Date: 2017-04-21 15:05:16 +0300 liblzma: Rename LZMA_SEEK to LZMA_SEEK_NEEDED and seek_in to seek_pos. src/liblzma/api/lzma/base.h | 18 +++++++++--------- src/liblzma/common/common.c | 2 +- src/xz/message.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) commit 662b27c417cab248cb365dd7682121bdec4d5ae7 Author: Lasse Collin Date: 2017-04-19 22:17:35 +0300 Update the home page URLs to HTTPS. COPYING | 2 +- README | 2 +- configure.ac | 2 +- doc/faq.txt | 4 ++-- dos/config.h | 2 +- src/common/common_w32res.rc | 2 +- src/xz/xz.1 | 6 +++--- src/xzdec/xzdec.1 | 4 ++-- windows/README-Windows.txt | 2 +- windows/config.h | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) commit c28f0b3d00af87b92dda229831548d8eb0067d1d Author: Lasse Collin Date: 2017-04-05 18:47:22 +0300 xz: Add io_seek_src(). src/xz/file_io.c | 20 +++++++++++++++++--- src/xz/file_io.h | 13 +++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) commit bba477257d7319c8764890f3669175b866d24944 Author: Lasse Collin Date: 2017-03-30 22:01:54 +0300 xz: Use POSIX_FADV_RANDOM for in "xz --list" mode. xz --list is random access so POSIX_FADV_SEQUENTIAL was clearly wrong. src/xz/file_io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) commit 310d19816d1652b0c8bb1b82574d46345d924752 Author: Lasse Collin Date: 2017-03-30 20:03:05 +0300 liblzma: Make lzma_index_decoder_init() visible to other liblzma funcs. This is to allow other functions to use it without going via the public API (lzma_index_decoder()). src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/index_decoder.c | 10 +++++----- src/liblzma/common/index_decoder.h | 24 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) commit a27920002dbc469f778a134fc665b7c3ea73701b Author: Lasse Collin Date: 2017-03-30 20:00:09 +0300 liblzma: Add generic support for input seeking (LZMA_SEEK). Also mention LZMA_SEEK in xz/message.c to silence a warning. src/liblzma/api/lzma/base.h | 31 ++++++++++++++++++++++++++++++- src/liblzma/common/common.c | 12 +++++++++++- src/xz/message.c | 1 + 3 files changed, 42 insertions(+), 2 deletions(-) commit a0b1dda409bc3e6e2957a2651663fc411d2caf2d Author: Lasse Collin Date: 2017-03-30 19:47:45 +0300 liblzma: Fix lzma_memlimit_set(strm, 0). The 0 got treated specially in a buggy way and as a result the function did nothing. The API doc said that 0 was supposed to return LZMA_PROG_ERROR but it didn't. Now 0 is treated as if 1 had been specified. This is done because 0 is already used to indicate an error from lzma_memlimit_get() and lzma_memusage(). In addition, lzma_memlimit_set() no longer checks that the new limit is at least LZMA_MEMUSAGE_BASE. It's counter-productive for the Index decoder and was actually needed only by the auto decoder. Auto decoder has now been modified to check for LZMA_MEMUSAGE_BASE. src/liblzma/api/lzma/base.h | 7 ++++++- src/liblzma/common/auto_decoder.c | 3 +++ src/liblzma/common/common.c | 6 ++++-- 3 files changed, 13 insertions(+), 3 deletions(-) commit 84462afaada61379f5878e46f8f00e25a1cdcf29 Author: Lasse Collin Date: 2017-03-30 19:16:55 +0300 liblzma: Similar memlimit fix for stream_, alone_, and auto_decoder. src/liblzma/api/lzma/container.h | 21 +++++++++++++++++---- src/liblzma/common/alone_decoder.c | 5 +---- src/liblzma/common/auto_decoder.c | 5 +---- src/liblzma/common/stream_decoder.c | 5 +---- 4 files changed, 20 insertions(+), 16 deletions(-) commit cbc74017939690d13441b8926bb743fb03211b83 Author: Lasse Collin Date: 2017-03-30 18:58:18 +0300 liblzma: Fix handling of memlimit == 0 in lzma_index_decoder(). It returned LZMA_PROG_ERROR, which was done to avoid zero as the limit (because it's a special value elsewhere), but using LZMA_PROG_ERROR is simply inconvenient and can cause bugs. The fix/workaround is to treat 0 as if it were 1 byte. It's effectively the same thing. The only weird consequence is that then lzma_memlimit_get() will return 1 even when 0 was specified as the limit. This fixes a very rare corner case in xz --list where a specific memory usage limit and a multi-stream file could print the error message "Internal error (bug)" instead of saying that the memory usage limit is too low. src/liblzma/api/lzma/index.h | 18 +++++++++++------- src/liblzma/common/index_decoder.c | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) commit 78ae13bced912b1b92ae927992c99cbcc463cae7 Author: Lasse Collin Date: 2016-12-30 13:25:10 +0200 Update NEWS for 5.2.3. NEWS | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) commit 0297863fdb453aed1a25eb025f3ba7bacbbb1357 Author: Lasse Collin Date: 2016-12-26 20:55:52 +0200 Document --enable-sandbox configure option in INSTALL. INSTALL | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) commit d4a0462abe5478193521c14625e1c81fead87f9f Author: Lasse Collin Date: 2016-11-21 20:24:50 +0200 liblzma: Avoid multiple definitions of lzma_coder structures. Only one definition was visible in a translation unit. It avoided a few casts and temp variables but seems that this hack doesn't work with link-time optimizations in compilers as it's not C99/C11 compliant. Fixes: http://www.mail-archive.com/xz-devel@tukaani.org/msg00279.html src/liblzma/common/alone_decoder.c | 44 +++++---- src/liblzma/common/alone_encoder.c | 34 ++++--- src/liblzma/common/auto_decoder.c | 35 ++++--- src/liblzma/common/block_decoder.c | 41 ++++---- src/liblzma/common/block_encoder.c | 40 ++++---- src/liblzma/common/common.h | 18 ++-- src/liblzma/common/index_decoder.c | 33 ++++--- src/liblzma/common/index_encoder.c | 16 ++-- src/liblzma/common/stream_decoder.c | 50 +++++----- src/liblzma/common/stream_encoder.c | 56 ++++++----- src/liblzma/common/stream_encoder_mt.c | 124 ++++++++++++++----------- src/liblzma/delta/delta_common.c | 25 ++--- src/liblzma/delta/delta_decoder.c | 6 +- src/liblzma/delta/delta_encoder.c | 12 ++- src/liblzma/delta/delta_private.h | 4 +- src/liblzma/lz/lz_decoder.c | 60 ++++++------ src/liblzma/lz/lz_decoder.h | 13 ++- src/liblzma/lz/lz_encoder.c | 57 +++++++----- src/liblzma/lz/lz_encoder.h | 9 +- src/liblzma/lzma/lzma2_decoder.c | 32 ++++--- src/liblzma/lzma/lzma2_encoder.c | 51 +++++----- src/liblzma/lzma/lzma_decoder.c | 27 +++--- src/liblzma/lzma/lzma_encoder.c | 29 +++--- src/liblzma/lzma/lzma_encoder.h | 9 +- src/liblzma/lzma/lzma_encoder_optimum_fast.c | 3 +- src/liblzma/lzma/lzma_encoder_optimum_normal.c | 23 ++--- src/liblzma/lzma/lzma_encoder_private.h | 6 +- src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 61 ++++++------ src/liblzma/simple/simple_private.h | 12 +-- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 15 +-- 35 files changed, 532 insertions(+), 423 deletions(-) commit a01794c52add98263b49119842c3e7141d1b9ced Author: Lasse Collin Date: 2016-10-24 18:53:25 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit df8f446e3ad47e5148b8c8d8b6e519d3ce29cb9d Author: Lasse Collin Date: 2016-10-24 18:51:36 +0300 tuklib_cpucores: Add support for sched_getaffinity(). It's available in glibc (GNU/Linux, GNU/kFreeBSD). It's better than sysconf(_SC_NPROCESSORS_ONLN) because sched_getaffinity() gives the number of cores available to the process instead of the total number of cores online. As a side effect, this commit fixes a bug on GNU/kFreeBSD where configure would detect the FreeBSD-specific cpuset_getaffinity() but it wouldn't actually work because on GNU/kFreeBSD it requires using -lfreebsd-glue when linking. Now the glibc-specific function will be used instead. Thanks to Sebastian Andrzej Siewior for the original patch and testing. m4/tuklib_cpucores.m4 | 30 +++++++++++++++++++++++++++++- src/common/tuklib_cpucores.c | 9 +++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) commit 446e4318fa79788e09299d5953b5dd428953d14b Author: Lasse Collin Date: 2016-06-30 20:27:36 +0300 xz: Fix copying of timestamps on Windows. xz used to call utime() on Windows, but its result gets lost on close(). Using _futime() seems to work. Thanks to Martok for reporting the bug: http://www.mail-archive.com/xz-devel@tukaani.org/msg00261.html configure.ac | 2 +- src/xz/file_io.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) commit 1b0ac0c53c761263e91e34195cb21dfdcfeac0bd Author: Lasse Collin Date: 2016-06-16 22:46:02 +0300 xz: Silence warnings from -Wlogical-op. Thanks to Evan Nemerson. src/xz/file_io.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit c83b7a03342c3325ff10400b22ee21edfcd1e026 Author: Lasse Collin Date: 2016-04-10 20:55:49 +0300 Build: Fix = to += for xz_SOURCES in src/xz/Makefile.am. Thanks to Christian Kujau. src/xz/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ade31a2bfb95c94d05fbfc0ecbba5d6377f2506e Author: Lasse Collin Date: 2016-04-10 20:54:17 +0300 Build: Bump GNU Gettext version requirement to 0.19. It silences a few warnings and most people probably have 0.19 even on stable distributions. Thanks to Christian Kujau. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ac398c3bafa6e4c80e20571373a96947db863b3d Author: Lasse Collin Date: 2016-03-13 20:21:49 +0200 liblzma: Disable external SHA-256 by default. This is the sane thing to do. The conflict with OpenSSL on some OSes and especially that the OS-provided versions can be significantly slower makes it clear that it was a mistake to have the external SHA-256 support enabled by default. Those who want it can now pass --enable-external-sha256 to configure. INSTALL was updated with notes about OSes where this can be a bad idea. The SHA-256 detection code in configure.ac had some bugs that could lead to a build failure in some situations. These were fixed, although it doesn't matter that much now that the external SHA-256 is disabled by default. MINIX >= 3.2.0 uses NetBSD's libc and thus has SHA256_Init in libc instead of libutil. Support for the libutil version was removed. INSTALL | 36 ++++++++++++++++++++++ configure.ac | 76 +++++++++++++++++++++++------------------------ src/liblzma/check/check.h | 16 ++++------ 3 files changed, 79 insertions(+), 49 deletions(-) commit 6fd5ecb589a9fdd7a576ea48c4850d496bab9ce5 Author: Lasse Collin Date: 2016-03-10 20:27:05 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 473ef0dc69a30e64d5fa0d34aca02f7309faa3e9 Author: Lasse Collin Date: 2016-03-10 20:26:49 +0200 Build: Avoid SHA256_Init on FreeBSD and MINIX 3. On FreeBSD 10 and older, SHA256_Init from libmd conflicts with libcrypto from OpenSSL. The OpenSSL version has different sizeof(SHA256_CTX) and it can cause weird problems if wrong SHA256_Init gets used. Looking at the source, MINIX 3 seems to have a similar issue but I'm not sure. To be safe, I disabled SHA256_Init on MINIX 3 too. NetBSD has SHA256_Init in libc and they had a similar problem, but they already fixed it in 2009. Thanks to Jim Wilcoxson for the bug report that helped in finding the problem. configure.ac | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) commit faf302137e54d605b44ecf0373cb51a6403a2de1 Author: Lasse Collin Date: 2015-11-08 20:16:10 +0200 tuklib_physmem: Hopefully silence a warning on Windows. src/common/tuklib_physmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit e52e9151cf8613022d1de4712ff39dbcb666e991 Author: Lasse Collin Date: 2015-11-04 23:17:43 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 14115f84a38161d55eaa2d070f08739bde37e966 Author: Lasse Collin Date: 2015-11-04 23:14:00 +0200 liblzma: Make Valgrind happier with optimized (gcc -O2) liblzma. When optimizing, GCC can reorder code so that an uninitialized value gets used in a comparison, which makes Valgrind unhappy. It doesn't happen when compiled with -O0, which I tend to use when running Valgrind. Thanks to Rich Prohaska. I remember this being mentioned long ago by someone else but nothing was done back then. src/liblzma/lz/lz_encoder.c | 4 ++++ 1 file changed, 4 insertions(+) commit f4c95ba94beb71a608eb6eadbf82b44f53a0260e Author: Lasse Collin Date: 2015-11-03 20:55:45 +0200 liblzma: Rename lzma_presets.c back to lzma_encoder_presets.c. It would be too annoying to update other build systems just because of this. src/liblzma/lzma/Makefile.inc | 2 +- src/liblzma/lzma/{lzma_presets.c => lzma_encoder_presets.c} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit c7bc20a6f3e71920871d48db31a79ab58b5a0a4b Author: Lasse Collin Date: 2015-11-03 20:47:07 +0200 Build: Disable xzdec, lzmadec, and lzmainfo when they cannot be built. They all need decoder support and if that isn't available, there's no point trying to build them. configure.ac | 3 +++ 1 file changed, 3 insertions(+) commit 5cbca1205deeb6fb7afe7a864fa68a57466d928a Author: Lasse Collin Date: 2015-11-03 20:35:19 +0200 Build: Simplify $enable_{encoders,decoders} usage a bit. configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit af13781886c8e7a0aabebb5141ea282dc364f5c6 Author: Lasse Collin Date: 2015-11-03 20:31:31 +0200 Windows/MSVC: Update config.h. windows/config.h | 6 ++++++ 1 file changed, 6 insertions(+) commit 9fa5949330f162c5a2f6653f83025327837e8f39 Author: Lasse Collin Date: 2015-11-03 20:29:58 +0200 DOS: Update config.h. dos/config.h | 6 ++++++ 1 file changed, 6 insertions(+) commit cb3111e3ed84152912b5138d690c8d9f00c6ef02 Author: Lasse Collin Date: 2015-11-03 20:29:33 +0200 xz: Make xz buildable even when encoders or decoders are disabled. The patch is quite long but it's mostly about adding new #ifdefs to omit code when encoders or decoders have been disabled. This adds two new #defines to config.h: HAVE_ENCODERS and HAVE_DECODERS. configure.ac | 4 ++++ src/xz/Makefile.am | 8 ++++++-- src/xz/args.c | 16 ++++++++++++++++ src/xz/coder.c | 33 +++++++++++++++++++++++++-------- src/xz/main.c | 9 +++++++-- src/xz/private.h | 5 ++++- 6 files changed, 62 insertions(+), 13 deletions(-) commit 4cc584985c0b7a13901da1b7a64ef9f7cc36e8ab Author: Lasse Collin Date: 2015-11-03 18:06:40 +0200 Build: Build LZMA1/2 presets also when only decoder is wanted. People shouldn't rely on the presets when decoding raw streams, but xz uses the presets as the starting point for raw decoder options anyway. lzma_encocder_presets.c was renamed to lzma_presets.c to make it clear it's not used solely by the encoder code. src/liblzma/lzma/Makefile.inc | 6 +++++- src/liblzma/lzma/{lzma_encoder_presets.c => lzma_presets.c} | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) commit 23ed1d41489f632155bbc9660f323d57e09da180 Author: Lasse Collin Date: 2015-11-03 17:54:48 +0200 Build: Fix configure to handle LZMA1 dependency with LZMA2. Now it gives an error if LZMA1 encoder/decoder is missing when LZMA2 encoder/decoder was requested. Even better would be LZMA2 implicitly enabling LZMA1 but it would need more code. configure.ac | 5 ----- 1 file changed, 5 deletions(-) commit b0bc3e03852af13419ea2960881824258d451117 Author: Lasse Collin Date: 2015-11-03 17:41:54 +0200 Build: Don't omit lzma_cputhreads() unless using --disable-threads. Previously it was omitted if encoders were disabled with --disable-encoders. It didn't make sense and it also broke the build. src/liblzma/common/Makefile.inc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit c6bf438ab39e0fb4a47d3c81725c227919502726 Author: Lasse Collin Date: 2015-11-02 18:16:51 +0200 liblzma: Fix a build failure related to external SHA-256 support. If an appropriate header and structure were found by configure, but a library with a usable SHA-256 functions wasn't, the build failed. src/liblzma/check/check.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) commit e18adc56f2262aa9394d46681e9e4b9981ed5e97 Author: Lasse Collin Date: 2015-11-02 15:19:10 +0200 xz: Always close the file before trying to delete it. unlink() can return EBUSY in errno for open files on some operating systems and file systems. src/xz/file_io.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) commit 282e768a1484e88c8b7ec35655ee4959954ec87a Author: Lasse Collin Date: 2015-10-12 21:08:42 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 372e402713a1d4337ffce5f56d5c5c9ed99a66d0 Author: Lasse Collin Date: 2015-10-12 21:07:41 +0300 Tests: Add tests for the two bugs fixed in index.c. tests/test_index.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) commit 21515d79d778b8730a434f151b07202d52a04611 Author: Lasse Collin Date: 2015-10-12 20:45:15 +0300 liblzma: Fix lzma_index_dup() for empty Streams. Stream Flags and Stream Padding weren't copied from empty Streams. src/liblzma/common/index.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) commit 09f395b6b360c0b13e8559eece1d179b908ebd3a Author: Lasse Collin Date: 2015-10-12 20:31:44 +0300 liblzma: Add a note to index.c for those using static analyzers. src/liblzma/common/index.c | 3 +++ 1 file changed, 3 insertions(+) commit 3bf857edfef51374f6f3fffae3d817f57d3264a0 Author: Lasse Collin Date: 2015-10-12 20:29:09 +0300 liblzma: Fix a memory leak in error path of lzma_index_dup(). lzma_index_dup() calls index_dup_stream() which, in case of an error, calls index_stream_end() to free memory allocated by index_stream_init(). However, it illogically didn't actually free the memory. To make it logical, the tree handling code was modified a bit in addition to changing index_stream_end(). Thanks to Evan Nemerson for the bug report. src/liblzma/common/index.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) commit 7f05803979b4b79642d5be4218a79da7a0b12c47 Author: Lasse Collin Date: 2015-09-29 13:57:28 +0300 Update NEWS for 5.2.2. NEWS | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 397fcc0946315b55c3c6d80e37e82a2a78bc15c1 Author: Hauke Henningsen Date: 2015-08-17 04:59:54 +0200 Update German translation, mostly wrt orthography Provide an update of the German translation. * A lot of compound words were previously written with spaces, while German orthography is relatively clear in that the components should not be separated. * When referring to the actual process of (de)compression rather than the concept, replace “(De-)Kompression” with “(De-)Komprimierung”. Previously, both forms were used in this context and are now used in a manner consistent with “Komprimierung” being more likely to refer to a process. * Consistently translate “standard input”/“output” * Use “Zeichen” instead of false friend “Charakter” for “character” * Insert commas around relative clauses (as required in German) * Some other minor corrections * Capitalize “ß” as “ẞ” * Consistently start option descriptions in --help with capital letters Acked-By: Andre Noll * Update after msgmerge po/de.po | 383 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 196 insertions(+), 187 deletions(-) commit cbc9e39bae715accb44168930a71888480aad569 Author: Lasse Collin Date: 2015-08-11 13:23:04 +0300 Build: Minor Cygwin cleanup. Some tests used "cygwin*" and some used "cygwin". I changed them all to use "cygwin". Shouldn't affect anything in practice. configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit bcacd8ce7a031566858e5e03c1009064c3f1c89e Author: Lasse Collin Date: 2015-08-11 13:21:52 +0300 Build: Support building of MSYS2 binaries. configure.ac | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) commit 0275a5398c01d57b724dec7fea52dec3bd6edc6c Author: Lasse Collin Date: 2015-08-09 21:06:26 +0300 Windows: Define DLL_EXPORT when building liblzma.dll with MSVC. src/liblzma/common/common.h uses it to set __declspec(dllexport) for the API symbols. Thanks to Adam Walling. windows/liblzma_dll.vcxproj | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit a74525cf9b945fb0b370e64cf406104beb31729b Author: Lasse Collin Date: 2015-08-09 21:02:20 +0300 Windows: Omit unneeded header files from MSVC project files. windows/liblzma.vcxproj | 5 ----- windows/liblzma_dll.vcxproj | 5 ----- 2 files changed, 10 deletions(-) commit fbbb295a91caf39faf8838c8c39526e4cb4dc121 Author: Lasse Collin Date: 2015-07-12 20:48:19 +0300 liblzma: A MSVC-specific hack isn't needed with MSVC 2013 and newer. src/liblzma/api/lzma.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) commit 713dbe5c230fe00865a54f5c32358ea30f9a1156 Author: Lasse Collin Date: 2015-06-19 20:38:55 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 3a5d755d055d51f99c523b4c2952727e1e69cfa1 Author: Lasse Collin Date: 2015-06-19 20:21:30 +0300 Windows: Update the docs. INSTALL | 29 ++++++++----- windows/INSTALL-MSVC.txt | 47 ++++++++++++++++++++++ windows/{INSTALL-Windows.txt => INSTALL-MinGW.txt} | 2 +- 3 files changed, 67 insertions(+), 11 deletions(-) commit b0798c6aa6184efcefd0bdcca20f96121a13feda Author: Lasse Collin Date: 2015-06-19 17:25:31 +0300 Windows: Add MSVC project files for building liblzma. Thanks to Adam Walling for creating these files. windows/liblzma.vcxproj | 359 ++++++++++++++++++++++++++++++++++++++++ windows/liblzma_dll.vcxproj | 388 ++++++++++++++++++++++++++++++++++++++++++++ windows/xz_win.sln | 48 ++++++ 3 files changed, 795 insertions(+) commit 9b02a4ffdac1b9f066658ec4c95c0834f4cd2fb7 Author: Andre Noll Date: 2015-05-28 15:50:00 +0200 Fix typo in German translation. As pointed out by Robert Pollak, there's a typo in the German translation of the compression preset option (-0 ... -9) help text. "The compressor" translates to "der Komprimierer", and the genitive form is "des Komprimierers". The old word makes no sense at all. po/de.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c7f4041f6b8f4729f88d3bc888b2a4080ae51f72 Author: Lasse Collin Date: 2015-05-13 20:57:55 +0300 Tests: Fix a memory leak in test_bcj_exact_size. Thanks to Cristian Rodríguez. tests/test_bcj_exact_size.c | 1 + 1 file changed, 1 insertion(+) commit 17b29d4f0ae0f780fbd69e15a398dc478d8492f8 Author: Lasse Collin Date: 2015-05-12 18:08:24 +0300 Fix NEWS about threading in 5.2.0. Thanks to Andy Hochhaus. NEWS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 49c26920d6e2d85e5c6123e34958aed2e77485ad Author: Lasse Collin Date: 2015-05-11 21:26:16 +0300 xz: Document that threaded decompression hasn't been implemented yet. src/xz/xz.1 | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) commit 5b2458cb244ed237efe4de1ebcf06e1b3a1f4256 Author: Lasse Collin Date: 2015-04-20 20:20:29 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6bd0349c58451b13442e8f463e35de83548bf985 Author: Lasse Collin Date: 2015-04-20 19:59:18 +0300 Revert "xz: Use pipe2() if available." This reverts commit 7a11c4a8e5e15f13d5fa59233b3172e65428efdd. It is a problem when libc has pipe2() but the kernel is too old to have pipe2() and thus pipe2() fails. In xz it's pointless to have a fallback for non-functioning pipe2(); it's better to avoid pipe2() completely. Thanks to Michael Fox for the bug report. configure.ac | 4 ++-- src/xz/file_io.c | 9 +-------- 2 files changed, 3 insertions(+), 10 deletions(-) commit fc0df0f8db87dff45543708a711e17d29c37f632 Author: Lasse Collin Date: 2015-04-01 14:45:25 +0300 xz: Fix the Capsicum rights on user_abort_pipe. src/xz/file_io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 57393615b31b3788dd77280452d845bcc12d33af Author: Lasse Collin Date: 2015-03-31 22:20:11 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 1238381143a9a7ce84839c2582ccd56ff750a440 Author: Lasse Collin Date: 2015-03-31 22:19:34 +0300 xz: Add support for sandboxing with Capsicum. The sandboxing is used conditionally as described in main.c. This isn't optimal but it was much easier to implement than a full sandboxing solution and it still covers the most common use cases where xz is writing to standard output. This should have practically no effect on performance even with small files as fork() isn't needed. C and locale libraries can open files as needed. This has been fine in the past, but it's a problem with things like Capsicum. io_sandbox_enter() tries to ensure that various locale-related files have been loaded before cap_enter() is called, but it's possible that there are other similar problems which haven't been seen yet. Currently Capsicum is available on FreeBSD 10 and later and there is a port to Linux too. Thanks to Loganaden Velvindron for help. configure.ac | 41 +++++++++++++++++++++++++++ src/xz/Makefile.am | 2 +- src/xz/file_io.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xz/file_io.h | 6 ++++ src/xz/main.c | 18 ++++++++++++ src/xz/private.h | 4 +++ 6 files changed, 151 insertions(+), 1 deletion(-) commit 29a087fb5a0c879f0b1bc4c6b989f7b87bacdf9e Author: Lasse Collin Date: 2015-03-31 21:12:30 +0300 Fix bugs and otherwise improve ax_check_capsicum.m4. AU_ALIAS was removed because the new version is incompatible with the old version. It no longer checks for separately. It's enough to test for it as part of AC_CHECK_DECL. The defines HAVE_CAPSICUM_SYS_CAPSICUM_H and HAVE_CAPSICUM_SYS_CAPABILITY_H were removed as unneeded. HAVE_SYS_CAPSICUM_H from AC_CHECK_HEADERS is enough. It no longer does a useless search for the Capsicum library if the header wasn't found. Fixed a bug in ACTION-IF-FOUND (the first argument). Specifying the argument omitted the default action but the given action wasn't used instead. AC_DEFINE([HAVE_CAPSICUM]) is now always called when Capsicum support is found. Previously it was part of the default ACTION-IF-FOUND which a custom action would override. Now the default action only prepends ${CAPSICUM_LIB} to LIBS. The documentation was updated. Since there as no serial number, "#serial 2" was added. m4/ax_check_capsicum.m4 | 103 ++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 52 deletions(-) commit 6e845c6a3eddf2fde9db5a29950421dff60a43ac Author: Lasse Collin Date: 2015-03-31 19:20:24 +0300 Add m4/ax_check_capsicum.m4 for detecting Capsicum support. The file was loaded from this web page: https://github.com/google/capsicum-test/blob/dev/autoconf/m4/ax_check_capsicum.m4 Thanks to Loganaden Velvindron for pointing it out for me. m4/ax_check_capsicum.m4 | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) commit 3717885f9ef2c06f1bcbad9f4c2ed2d5695f844e Author: Lasse Collin Date: 2015-03-30 22:44:02 +0300 Bump version to 5.3.0alpha and soname to 5.3.99. The idea of 99 is that it looks a bit weird in this context. For new features there's no API/ABI stability in devel versions. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) commit eccd8155e107c5ada03d13e7730675cdf1a44ddc Author: Lasse Collin Date: 2015-03-29 22:14:47 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 25263fd9e7a8a913395cb93d7c104cd48c2b4a00 Author: Lasse Collin Date: 2015-03-29 22:13:48 +0300 Fix the detection of installed RAM on QNX. The earlier version compiled but didn't actually work since sysconf(_SC_PHYS_PAGES) always fails (or so I was told). Thanks to Ole André Vadla Ravnås for the patch and testing. m4/tuklib_physmem.m4 | 6 +++--- src/common/tuklib_physmem.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) commit 4c544d2410903d38402221cb783ed85585b6a007 Author: Lasse Collin Date: 2015-03-27 22:39:07 +0200 Fix CPU core count detection on QNX. It tried to use sysctl() on QNX but - it broke the build because sysctl() needs -lsocket on QNX; - sysctl() doesn't work for detecting the core count on QNX even if it compiled. sysconf() works. An alternative would have been to use QNX-specific SYSPAGE_ENTRY(num_cpu) from . Thanks to Ole André Vadla Ravnås. m4/tuklib_cpucores.m4 | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) commit e0ea6737b03e83ccaff4514d00e31bb926f8f0f3 Author: Lasse Collin Date: 2015-03-07 22:05:57 +0200 xz: size_t/uint32_t cleanup in options.c. src/xz/options.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 8bcca29a65335fd679c13814b70b35b68fa5daed Author: Lasse Collin Date: 2015-03-07 22:04:23 +0200 xz: Fix a comment and silence a warning in message.c. src/xz/message.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit f243f5f44c6b19a7c289a0ec73a03ee08364cb5b Author: Lasse Collin Date: 2015-03-07 22:01:00 +0200 liblzma: Silence more uint32_t vs. size_t warnings. src/liblzma/lz/lz_encoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 7f0a4c50f4a374c40acf4b86848f301ad1e82d34 Author: Lasse Collin Date: 2015-03-07 19:54:00 +0200 xz: Make arg_count an unsigned int to silence a warning. Actually the value of arg_count cannot exceed INT_MAX but it's nicer as an unsigned int. src/xz/args.h | 2 +- src/xz/main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit f6ec46801588b1be29c07c9db98558b521304002 Author: Lasse Collin Date: 2015-03-07 19:33:17 +0200 liblzma: Fix a warning in index.c. src/liblzma/common/index.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit a24518971cc621315af142dd3bb7614fab04ad27 Author: Lasse Collin Date: 2015-02-26 20:46:14 +0200 Build: Fix a CR+LF problem when running autoreconf -fi on OS/2. build-aux/version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit dec11497a71518423b5ff0e759100cf8aadf6c7b Author: Lasse Collin Date: 2015-02-26 16:53:44 +0200 Bump version and soname for 5.2.1. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 29e39c79975ab89ee5dd671e97064534a9f3a649 Author: Lasse Collin Date: 2015-02-26 13:01:09 +0200 Update NEWS for 5.2.1. NEWS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) commit 7a11c4a8e5e15f13d5fa59233b3172e65428efdd Author: Lasse Collin Date: 2015-02-22 19:38:48 +0200 xz: Use pipe2() if available. configure.ac | 4 ++-- src/xz/file_io.c | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) commit 117d962685c72682c63edc9bb765367189800202 Author: Lasse Collin Date: 2015-02-21 23:40:26 +0200 liblzma: Fix a compression-ratio regression in LZMA1/2 in fast mode. The bug was added in the commit f48fce093b07aeda95c18850f5e086d9f2383380 and thus affected 5.1.4beta and 5.2.0. Luckily the bug cannot cause data corruption or other nasty things. src/liblzma/lzma/lzma_encoder_optimum_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ae984e31c167d3bc52972ec422dd1ebd5f5d5719 Author: Lasse Collin Date: 2015-02-21 23:00:19 +0200 xz: Fix the fcntl() usage when creating a pipe for the self-pipe trick. Now it reads the old flags instead of blindly setting O_NONBLOCK. The old code may have worked correctly, but this is better. src/xz/file_io.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) commit 2205bb5853098aea36a56df6f5747037175f66b4 Author: Lasse Collin Date: 2015-02-10 15:29:34 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit d935b0cdf3db440269b9d952b2b281b18f8c7b08 Author: Lasse Collin Date: 2015-02-10 15:28:30 +0200 tuklib_cpucores: Use cpuset_getaffinity() on FreeBSD if available. In FreeBSD, cpuset_getaffinity() is the preferred way to get the number of available cores. Thanks to Rui Paulo for the patch. I edited it slightly, but hopefully I didn't break anything. m4/tuklib_cpucores.m4 | 23 ++++++++++++++++++++++- src/common/tuklib_cpucores.c | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) commit eb61bc58c20769cac4d05f363b9c0e8c9c71a560 Author: Lasse Collin Date: 2015-02-09 22:08:37 +0200 xzdiff: Make the mktemp usage compatible with FreeBSD's mktemp. Thanks to Rui Paulo for the fix. src/scripts/xzdiff.in | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit b9a5b6b7a29029680af733082b6a46e0fc01623a Author: Lasse Collin Date: 2015-02-03 21:45:53 +0200 Add a few casts to tuklib_integer.h to silence possible warnings. I heard that Visual Studio 2013 gave warnings without the casts. Thanks to Gabi Davar. src/common/tuklib_integer.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) commit c45757135f40e4a0de730ba5fff0100219493982 Author: Lasse Collin Date: 2015-01-26 21:24:39 +0200 liblzma: Set LZMA_MEMCMPLEN_EXTRA depending on the compare method. src/liblzma/common/memcmplen.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) commit 3c500174ed5485f550972a2a6109c361e875f069 Author: Lasse Collin Date: 2015-01-26 20:40:16 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit fec88d41e672d9e197c9442aecf02bd0dfa6d516 Author: Lasse Collin Date: 2015-01-26 20:39:28 +0200 liblzma: Silence harmless Valgrind errors. Thanks to Torsten Rupp for reporting this. I had forgotten to run Valgrind before the 5.2.0 release. src/liblzma/lz/lz_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) commit a9b45badfec0928d20a27c7176c005fa637f7d1e Author: Lasse Collin Date: 2015-01-09 21:50:19 +0200 xz: Fix comments. src/xz/file_io.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit 541aee6dd4aa97a809aba281475a21b641bb89e2 Author: Lasse Collin Date: 2015-01-09 21:35:06 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4170edc914655310d2363baccf5e615e09b04911 Author: Lasse Collin Date: 2015-01-09 21:34:06 +0200 xz: Don't fail if stdout doesn't support O_NONBLOCK. This is similar to the case with stdin. Thanks to Brad Smith for the bug report and testing on OpenBSD. src/xz/file_io.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) commit 04bbc0c2843c50c8ad1cba42b937118e38b0508d Author: Lasse Collin Date: 2015-01-07 19:18:20 +0200 xz: Fix a memory leak in DOS-specific code. src/xz/file_io.c | 2 ++ 1 file changed, 2 insertions(+) commit f0f1f6c7235ffa901cf76fe18e33749e200b3eea Author: Lasse Collin Date: 2015-01-07 19:08:06 +0200 xz: Don't fail if stdin doesn't support O_NONBLOCK. It's a problem at least on OpenBSD which doesn't support O_NONBLOCK on e.g. /dev/null. I'm not surprised if it's a problem on other OSes too since this behavior is allowed in POSIX-1.2008. The code relying on this behavior was committed in June 2013 and included in 5.1.3alpha released on 2013-10-26. Clearly the development releases only get limited testing. src/xz/file_io.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) commit d2d484647d9d9d679f03c75abb0404f67069271c Author: Lasse Collin Date: 2015-01-06 20:30:15 +0200 Tests: Don't hide unexpected error messages in test_files.sh. Hiding them makes no sense since normally there's no error when testing the "good" files. With "bad" files errors are expected and then it makes sense to keep the messages hidden. tests/test_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit aae6a6aeda51cf94a47e39ad624728f9bee75e30 Author: Lasse Collin Date: 2014-12-30 11:17:16 +0200 Update Solaris notes in INSTALL. Mention the possible "make check" failure on Solaris in the Solaris-specific section of INSTALL. It was already in section 4.5 but it is better mention it in the OS-specific section too. INSTALL | 4 ++++ 1 file changed, 4 insertions(+) commit 7815112153178800a3521b9f31960e7cdc26cfba Author: Lasse Collin Date: 2014-12-26 12:00:05 +0200 Build: POSIX shell isn't required if scripts are disabled. INSTALL | 3 ++- configure.ac | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit a0cd05ee71d330b79ead6eb9222e1b24e1559d3a Author: Lasse Collin Date: 2014-12-21 20:48:37 +0200 DOS: Update Makefile. dos/Makefile | 1 + 1 file changed, 1 insertion(+) commit b85ee0905ec4ab7656d22e63519fdd3bedb21f2e Author: Lasse Collin Date: 2014-12-21 19:50:38 +0200 Windows: Fix bin_i486 to bin_i686 in build.bash. windows/build.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit cbafa710918195dbba3db02c3fab4f0538235206 Author: Lasse Collin Date: 2014-12-21 18:58:44 +0200 Docs: Use lzma_cputhreads() in 04_compress_easy_mt.c. doc/examples/04_compress_easy_mt.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) commit 8dbb57238d372c7263cfeb3e7f7fd9a73173156a Author: Lasse Collin Date: 2014-12-21 18:56:44 +0200 Docs: Update docs/examples/00_README.txt. doc/examples/00_README.txt | 4 ++++ 1 file changed, 4 insertions(+) commit 6060f7dc76fd6c2a8a1f8e85d0e4d86bb78273e6 Author: Lasse Collin Date: 2014-12-21 18:11:17 +0200 Bump version and soname for 5.2.0. I know that soname != app version, but I skip AGE=1 in -version-info to make the soname match the liblzma version anyway. It doesn't hurt anything as long as it doesn't conflict with library versioning rules. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- src/liblzma/liblzma.map | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/contrib/xz/README b/contrib/xz/README index ed1cace194f8..9e76301b95cd 100644 --- a/contrib/xz/README +++ b/contrib/xz/README @@ -1,303 +1,307 @@ XZ Utils ======== 0. Overview 1. Documentation 1.1. Overall documentation 1.2. Documentation for command-line tools 1.3. Documentation for liblzma 2. Version numbering 3. Reporting bugs 4. Translations 5. Other implementations of the .xz format 6. Contact information 0. Overview ----------- XZ Utils provide a general-purpose data-compression library plus command-line tools. The native file format is the .xz format, but also the legacy .lzma format is supported. The .xz format supports multiple compression algorithms, which are called "filters" in the context of XZ Utils. The primary filter is currently LZMA2. With typical files, XZ Utils create about 30 % smaller files than gzip. To ease adapting support for the .xz format into existing applications and scripts, the API of liblzma is somewhat similar to the API of the popular zlib library. For the same reason, the command-line tool xz has a command-line syntax similar to that of gzip. When aiming for the highest compression ratio, the LZMA2 encoder uses a lot of CPU time and may use, depending on the settings, even hundreds of megabytes of RAM. However, in fast modes, the LZMA2 encoder competes with bzip2 in compression speed, RAM usage, and compression ratio. LZMA2 is reasonably fast to decompress. It is a little slower than gzip, but a lot faster than bzip2. Being fast to decompress means that the .xz format is especially nice when the same file will be decompressed very many times (usually on different computers), which is the case e.g. when distributing software packages. In such situations, it's not too bad if the compression takes some time, since that needs to be done only once to benefit many people. With some file types, combining (or "chaining") LZMA2 with an additional filter can improve the compression ratio. A filter chain may contain up to four filters, although usually only one or two are used. For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2 in the filter chain can improve compression ratio of executable files. Since the .xz format allows adding new filter IDs, it is possible that some day there will be a filter that is, for example, much faster to compress than LZMA2 (but probably with worse compression ratio). Similarly, it is possible that some day there is a filter that will compress better than LZMA2. XZ Utils supports multithreaded compression. XZ Utils doesn't support multithreaded decompression yet. It has been planned though and taken into account when designing the .xz file format. In the future, files that were created in threaded mode can be decompressed in threaded mode too. 1. Documentation ---------------- 1.1. Overall documentation README This file INSTALL.generic Generic install instructions for those not familiar with packages using GNU Autotools INSTALL Installation instructions specific to XZ Utils PACKAGERS Information to packagers of XZ Utils COPYING XZ Utils copyright and license information COPYING.GPLv2 GNU General Public License version 2 COPYING.GPLv3 GNU General Public License version 3 COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 AUTHORS The main authors of XZ Utils THANKS Incomplete list of people who have helped making this software NEWS User-visible changes between XZ Utils releases ChangeLog Detailed list of changes (commit log) TODO Known bugs and some sort of to-do list Note that only some of the above files are included in binary packages. 1.2. Documentation for command-line tools The command-line tools are documented as man pages. In source code releases (and possibly also in some binary packages), the man pages are also provided in plain text (ASCII only) and PDF formats in the directory "doc/man" to make the man pages more accessible to those whose operating system doesn't provide an easy way to view man pages. 1.3. Documentation for liblzma The liblzma API headers include short docs about each function and data type as Doxygen tags. These docs should be quite OK as a quick reference. There are a few example/tutorial programs that should help in getting started with liblzma. In the source package the examples are in "doc/examples" and in binary packages they may be under "examples" in the same directory as this README. Since the liblzma API has similarities to the zlib API, some people may find it useful to read the zlib docs and tutorial too: https://zlib.net/manual.html https://zlib.net/zlib_how.html 2. Version numbering -------------------- The version number format of XZ Utils is X.Y.ZS: - X is the major version. When this is incremented, the library API and ABI break. - Y is the minor version. It is incremented when new features are added without breaking the existing API or ABI. An even Y indicates a stable release and an odd Y indicates unstable (alpha or beta version). - Z is the revision. This has a different meaning for stable and unstable releases: * Stable: Z is incremented when bugs get fixed without adding any new features. This is intended to be convenient for downstream distributors that want bug fixes but don't want any new features to minimize the risk of introducing new bugs. * Unstable: Z is just a counter. API or ABI of features added in earlier unstable releases having the same X.Y may break. - S indicates stability of the release. It is missing from the stable releases, where Y is an even number. When Y is odd, S is either "alpha" or "beta" to make it very clear that such versions are not stable releases. The same X.Y.Z combination is not used for more than one stability level, i.e. after X.Y.Zalpha, the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta. 3. Reporting bugs ----------------- Naturally it is easiest for me if you already know what causes the unexpected behavior. Even better if you have a patch to propose. However, quite often the reason for unexpected behavior is unknown, so here are a few things to do before sending a bug report: 1. Try to create a small example how to reproduce the issue. 2. Compile XZ Utils with debugging code using configure switches --enable-debug and, if possible, --disable-shared. If you are using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting binaries. 3. Turn on core dumps. The exact command depends on your shell; for example in GNU bash it is done with "ulimit -c unlimited", and in tcsh with "limit coredumpsize unlimited". 4. Try to reproduce the suspected bug. If you get "assertion failed" message, be sure to include the complete message in your bug report. If the application leaves a coredump, get a backtrace using gdb: $ gdb /path/to/app-binary # Load the app to the debugger. (gdb) core core # Open the coredump. (gdb) bt # Print the backtrace. Copy & paste to bug report. (gdb) quit # Quit gdb. Report your bug via email or IRC (see Contact information below). Don't send core dump files or any executables. If you have a small example file(s) (total size less than 256 KiB), please include it/them as an attachment. If you have bigger test files, put them online somewhere and include a URL to the file(s) in the bug report. Always include the exact version number of XZ Utils in the bug report. If you are using a snapshot from the git repository, use "git describe" to get the exact snapshot version. If you are using XZ Utils shipped in an operating system distribution, mention the distribution name, distribution version, and exact xz package version; if you cannot repeat the bug with the code compiled from unpatched source code, you probably need to report a bug to your distribution's bug tracking system. 4. Translations --------------- The xz command line tool and all man pages can be translated. The translations are handled via the Translation Project. If you wish to help translating xz, please join the Translation Project: https://translationproject.org/html/translators.html Below are notes and testing instructions specific to xz translations. Testing can be done by installing xz into a temporary directory: ./configure --disable-shared --prefix=/tmp/xz-test # make -C po update-po make install bash debug/translation.bash | less bash debug/translation.bash | less -S # For --list outputs Repeat the above as needed (no need to re-run configure though). Note especially the following: - The output of --help and --long-help must look nice on an 80-column terminal. It's OK to add extra lines if needed. - In contrast, don't add extra lines to error messages and such. They are often preceded with e.g. a filename on the same line, so you have no way to predict where to put a \n. Let the terminal do the wrapping even if it looks ugly. Adding new lines will be even uglier in the generic case even if it looks nice in a few limited examples. - Be careful with column alignment in tables and table-like output (--list, --list --verbose --verbose, --info-memory, --help, and --long-help): * All descriptions of options in --help should start in the same column (but it doesn't need to be the same column as in the English messages; just be consistent if you change it). Check that both --help and --long-help look OK, since they share several strings. * --list --verbose and --info-memory print lines that have the format "Description: %s". If you need a longer description, you can put extra space between the colon and %s. Then you may need to add extra space to other strings too so that the result as a whole looks good (all values start at the same column). * The columns of the actual tables in --list --verbose --verbose should be aligned properly. Abbreviate if necessary. It might be good to keep at least 2 or 3 spaces between column headings and avoid spaces in the headings so that the columns stand out better, but this is a matter of opinion. Do what you think looks best. - Be careful to put a period at the end of a sentence when the original version has it, and don't put it when the original doesn't have it. Similarly, be careful with \n characters at the beginning and end of the strings. - Read the TRANSLATORS comments that have been extracted from the source code and included in xz.pot. Some comments suggest testing with a specific command which needs an .xz file. You may use e.g. any tests/files/good-*.xz. However, these test commands are included in translations.bash output, so reading translations.bash output carefully can be enough. - If you find language problems in the original English strings, feel free to suggest improvements. Ask if something is unclear. - The translated messages should be understandable (sometimes this may be a problem with the original English messages too). Don't make a direct word-by-word translation from English especially if the result doesn't sound good in your language. Thanks for your help! 5. Other implementations of the .xz format ------------------------------------------ 7-Zip and the p7zip port of 7-Zip support the .xz format starting from the version 9.00alpha. https://7-zip.org/ https://p7zip.sourceforge.net/ XZ Embedded is a limited implementation written for use in the Linux kernel, but it is also suitable for other embedded use. https://tukaani.org/xz/embedded.html + XZ for Java is a complete implementation written in pure Java. + + https://tukaani.org/xz/java.html + 6. Contact information ---------------------- If you have questions, bug reports, patches etc. related to XZ Utils, the project maintainers Lasse Collin and Jia Tan can be reached via . You might find Lasse also from #tukaani on Libera Chat (IRC). The nick is Larhzu. The channel tends to be pretty quiet, so just ask your question and someone might wake up. diff --git a/contrib/xz/THANKS b/contrib/xz/THANKS index e28d3dc8621b..cf7c59c844c5 100644 --- a/contrib/xz/THANKS +++ b/contrib/xz/THANKS @@ -1,167 +1,170 @@ Thanks ====== Some people have helped more, some less, but nevertheless everyone's help has been important. :-) In alphabetical order: - Mark Adler - H. Peter Anvin - Jeff Bastian - Nelson H. F. Beebe - Karl Beldan - Karl Berry - Anders F. Björklund - Emmanuel Blot - Melanie Blower - Alexander Bluhm - Martin Blumenstingl - Ben Boeckel - Jakub Bogusz - Adam Borowski - Maarten Bosmans + - Benjamin Buch - Trent W. Buck - Kevin R. Bulgrien - James Buren - David Burklund - Frank Busse - Daniel Mealha Cabrita - Milo Casagrande - Marek Černocký - Tomer Chachamu - Vitaly Chikunov - Antoine Cœur - Gabi Davar - İhsan Doğan - Chris Donawa - Andrew Dudman - Markus Duft - İsmail Dönmez - Paul Eggert - Robert Elz - Gilles Espinasse - Denis Excoffier - Vincent Fazio - Michael Felt - Michael Fox - Mike Frysinger - Daniel Richard G. - Tomasz Gajc - Bjarni Ingi Gislason - John Paul Adrian Glaubitz - Bill Glessner - Michał Górny - Jason Gorski - Juan Manuel Guerrero - Diederik de Haas - Joachim Henke - Christian Hesse - Vincenzo Innocente - Peter Ivanov - Nicholas Jackson - Sam James - Hajin Jang - Jouk Jansen - Jun I Jin - Kiyoshi Kanazawa - Per Øyvind Karlsen - Iouri Kharon - Thomas Klausner - Richard Koch - Anton Kochkov - Ville Koskinen - Marcin Kowalczyk - Jan Kratochvil - Christian Kujau - Stephan Kulow - Ilya Kurdyukov - Peter Lawler - James M Leddy - Vincent Lefevre - Hin-Tak Leung - Andraž 'ruskie' Levstik - Cary Lewis - Wim Lewis - Xin Li - Eric Lindblad - Lorenzo De Liso - H.J. Lu - Bela Lubkin - Gregory Margo - Julien Marrec - Ed Maste - Martin Matuška - Ivan A. Melnikov - Jim Meyering - Arkadiusz Miskiewicz - Nathan Moinvaziri - Étienne Mollier - Conley Moorhous - Rafał Mużyło - Adrien Nader - Evan Nemerson - Hongbo Ni - Jonathan Nieder - Andre Noll - Peter O'Gorman + - Dimitri Papadopoulos Orfanos - Daniel Packard - Filip Palian - Peter Pallinger - Rui Paulo - Igor Pavlov - Diego Elio Pettenò - Elbert Pol - Mikko Pouru - Rich Prohaska - Trần Ngọc Quân - Pavel Raiskup - Ole André Vadla Ravnås - Eric S. Raymond - Robert Readman - Bernhard Reutner-Fischer - Markus Rickert - Cristian Rodríguez - Christian von Roques - Boud Roukema - Torsten Rupp - Stephen Sachs - Jukka Salmi - Alexandre Sauvé - Benno Schulenberg - Andreas Schwab - Bhargava Shastry - Dan Shechter - Stuart Shelton - Sebastian Andrzej Siewior - Ville Skyttä - Brad Smith - Bruce Stark - Pippijn van Steenhoven - Jonathan Stott - Dan Stromberg - Jia Tan - Vincent Torri - Paul Townsend - Mohammed Adnène Trojette - Alexey Tourbin + - Taiki Tsunekawa - Loganaden Velvindron - Patrick J. Volkerding - Martin Väth - Adam Walling - Jeffrey Walton - Christian Weisgerber - Dan Weiss - Bert Wesarg - Fredrik Wikstrom - Jim Wilcoxson - Ralf Wildenhues - Charles Wilson - Lars Wirzenius - Pilorz Wojciech - Ryan Young - Andreas Zieringer Also thanks to all the people who have participated in the Tukaani project. I have probably forgot to add some names to the above list. Sorry about that and thanks for your help. diff --git a/contrib/xz/src/common/mythread.h b/contrib/xz/src/common/mythread.h index 7a80f966e31f..1cce50e9fd85 100644 --- a/contrib/xz/src/common/mythread.h +++ b/contrib/xz/src/common/mythread.h @@ -1,528 +1,528 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file mythread.h /// \brief Some threading related helper macros and functions // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef MYTHREAD_H #define MYTHREAD_H #include "sysdefs.h" // If any type of threading is enabled, #define MYTHREAD_ENABLED. #if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \ || defined(MYTHREAD_VISTA) # define MYTHREAD_ENABLED 1 #endif #ifdef MYTHREAD_ENABLED //////////////////////////////////////// // Shared between all threading types // //////////////////////////////////////// // Locks a mutex for a duration of a block. // // Perform mythread_mutex_lock(&mutex) in the beginning of a block // and mythread_mutex_unlock(&mutex) at the end of the block. "break" // may be used to unlock the mutex and jump out of the block. // mythread_sync blocks may be nested. // // Example: // // mythread_sync(mutex) { // foo(); // if (some_error) // break; // Skips bar() // bar(); // } // // At least GCC optimizes the loops completely away so it doesn't slow // things down at all compared to plain mythread_mutex_lock(&mutex) // and mythread_mutex_unlock(&mutex) calls. // #define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__) #define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line) #define mythread_sync_helper2(mutex, line) \ for (unsigned int mythread_i_ ## line = 0; \ mythread_i_ ## line \ ? (mythread_mutex_unlock(&(mutex)), 0) \ : (mythread_mutex_lock(&(mutex)), 1); \ mythread_i_ ## line = 1) \ for (unsigned int mythread_j_ ## line = 0; \ !mythread_j_ ## line; \ mythread_j_ ## line = 1) #endif #if !defined(MYTHREAD_ENABLED) ////////////////// // No threading // ////////////////// // Calls the given function once. This isn't thread safe. #define mythread_once(func) \ do { \ static bool once_ = false; \ if (!once_) { \ func(); \ once_ = true; \ } \ } while (0) -#if !(defined(_WIN32) && !defined(__CYGWIN__)) +#if !(defined(_WIN32) && !defined(__CYGWIN__)) && !defined(__wasm__) // Use sigprocmask() to set the signal mask in single-threaded programs. #include static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { int ret = sigprocmask(how, set, oset); assert(ret == 0); (void)ret; } #endif #elif defined(MYTHREAD_POSIX) //////////////////// // Using pthreads // //////////////////// #include #include #include #include // If clock_gettime() isn't available, use gettimeofday() from // as a fallback. gettimeofday() is in SUSv2 and thus is supported on all // relevant POSIX systems. #ifndef HAVE_CLOCK_GETTIME # include #endif #define MYTHREAD_RET_TYPE void * #define MYTHREAD_RET_VALUE NULL typedef pthread_t mythread; typedef pthread_mutex_t mythread_mutex; typedef struct { pthread_cond_t cond; #ifdef HAVE_CLOCK_GETTIME // Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with // the condition variable. clockid_t clk_id; #endif } mythread_cond; typedef struct timespec mythread_condtime; // Calls the given function once in a thread-safe way. #define mythread_once(func) \ do { \ static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ pthread_once(&once_, &func); \ } while (0) // Use pthread_sigmask() to set the signal mask in multi-threaded programs. // Do nothing on OpenVMS since it lacks pthread_sigmask(). static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { #ifdef __VMS (void)how; (void)set; (void)oset; #else int ret = pthread_sigmask(how, set, oset); assert(ret == 0); (void)ret; #endif } // Creates a new thread with all signals blocked. Returns zero on success // and non-zero on error. static inline int mythread_create(mythread *thread, void *(*func)(void *arg), void *arg) { sigset_t old; sigset_t all; sigfillset(&all); mythread_sigmask(SIG_SETMASK, &all, &old); const int ret = pthread_create(thread, NULL, func, arg); mythread_sigmask(SIG_SETMASK, &old, NULL); return ret; } // Joins a thread. Returns zero on success and non-zero on error. static inline int mythread_join(mythread thread) { return pthread_join(thread, NULL); } // Initiatlizes a mutex. Returns zero on success and non-zero on error. static inline int mythread_mutex_init(mythread_mutex *mutex) { return pthread_mutex_init(mutex, NULL); } static inline void mythread_mutex_destroy(mythread_mutex *mutex) { int ret = pthread_mutex_destroy(mutex); assert(ret == 0); (void)ret; } static inline void mythread_mutex_lock(mythread_mutex *mutex) { int ret = pthread_mutex_lock(mutex); assert(ret == 0); (void)ret; } static inline void mythread_mutex_unlock(mythread_mutex *mutex) { int ret = pthread_mutex_unlock(mutex); assert(ret == 0); (void)ret; } // Initializes a condition variable. // // Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the // timeout in pthread_cond_timedwait() work correctly also if system time // is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available // everywhere while the default CLOCK_REALTIME is, so the default is // used if CLOCK_MONOTONIC isn't available. // // If clock_gettime() isn't available at all, gettimeofday() will be used. static inline int mythread_cond_init(mythread_cond *mycond) { #ifdef HAVE_CLOCK_GETTIME # if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && \ defined(HAVE_CLOCK_MONOTONIC) struct timespec ts; pthread_condattr_t condattr; // POSIX doesn't seem to *require* that pthread_condattr_setclock() // will fail if given an unsupported clock ID. Test that // CLOCK_MONOTONIC really is supported using clock_gettime(). if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0 && pthread_condattr_init(&condattr) == 0) { int ret = pthread_condattr_setclock( &condattr, CLOCK_MONOTONIC); if (ret == 0) ret = pthread_cond_init(&mycond->cond, &condattr); pthread_condattr_destroy(&condattr); if (ret == 0) { mycond->clk_id = CLOCK_MONOTONIC; return 0; } } // If anything above fails, fall back to the default CLOCK_REALTIME. // POSIX requires that all implementations of clock_gettime() must // support at least CLOCK_REALTIME. # endif mycond->clk_id = CLOCK_REALTIME; #endif return pthread_cond_init(&mycond->cond, NULL); } static inline void mythread_cond_destroy(mythread_cond *cond) { int ret = pthread_cond_destroy(&cond->cond); assert(ret == 0); (void)ret; } static inline void mythread_cond_signal(mythread_cond *cond) { int ret = pthread_cond_signal(&cond->cond); assert(ret == 0); (void)ret; } static inline void mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) { int ret = pthread_cond_wait(&cond->cond, mutex); assert(ret == 0); (void)ret; } // Waits on a condition or until a timeout expires. If the timeout expires, // non-zero is returned, otherwise zero is returned. static inline int mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, const mythread_condtime *condtime) { int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime); assert(ret == 0 || ret == ETIMEDOUT); return ret; } // Sets condtime to the absolute time that is timeout_ms milliseconds // in the future. The type of the clock to use is taken from cond. static inline void mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, uint32_t timeout_ms) { condtime->tv_sec = (time_t)(timeout_ms / 1000); condtime->tv_nsec = (long)((timeout_ms % 1000) * 1000000); #ifdef HAVE_CLOCK_GETTIME struct timespec now; int ret = clock_gettime(cond->clk_id, &now); assert(ret == 0); (void)ret; condtime->tv_sec += now.tv_sec; condtime->tv_nsec += now.tv_nsec; #else (void)cond; struct timeval now; gettimeofday(&now, NULL); condtime->tv_sec += now.tv_sec; condtime->tv_nsec += now.tv_usec * 1000L; #endif // tv_nsec must stay in the range [0, 999_999_999]. if (condtime->tv_nsec >= 1000000000L) { condtime->tv_nsec -= 1000000000L; ++condtime->tv_sec; } } #elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA) ///////////////////// // Windows threads // ///////////////////// #define WIN32_LEAN_AND_MEAN #ifdef MYTHREAD_VISTA # undef _WIN32_WINNT # define _WIN32_WINNT 0x0600 #endif #include #include #define MYTHREAD_RET_TYPE unsigned int __stdcall #define MYTHREAD_RET_VALUE 0 typedef HANDLE mythread; typedef CRITICAL_SECTION mythread_mutex; #ifdef MYTHREAD_WIN95 typedef HANDLE mythread_cond; #else typedef CONDITION_VARIABLE mythread_cond; #endif typedef struct { // Tick count (milliseconds) in the beginning of the timeout. // NOTE: This is 32 bits so it wraps around after 49.7 days. // Multi-day timeouts may not work as expected. DWORD start; // Length of the timeout in milliseconds. The timeout expires // when the current tick count minus "start" is equal or greater // than "timeout". DWORD timeout; } mythread_condtime; // mythread_once() is only available with Vista threads. #ifdef MYTHREAD_VISTA #define mythread_once(func) \ do { \ static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \ BOOL pending_; \ if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \ abort(); \ if (pending_) { \ func(); \ if (!InitOnceComplete(&once, 0, NULL)) \ abort(); \ } \ } while (0) #endif // mythread_sigmask() isn't available on Windows. Even a dummy version would // make no sense because the other POSIX signal functions are missing anyway. static inline int mythread_create(mythread *thread, unsigned int (__stdcall *func)(void *arg), void *arg) { uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL); if (ret == 0) return -1; *thread = (HANDLE)ret; return 0; } static inline int mythread_join(mythread thread) { int ret = 0; if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0) ret = -1; if (!CloseHandle(thread)) ret = -1; return ret; } static inline int mythread_mutex_init(mythread_mutex *mutex) { InitializeCriticalSection(mutex); return 0; } static inline void mythread_mutex_destroy(mythread_mutex *mutex) { DeleteCriticalSection(mutex); } static inline void mythread_mutex_lock(mythread_mutex *mutex) { EnterCriticalSection(mutex); } static inline void mythread_mutex_unlock(mythread_mutex *mutex) { LeaveCriticalSection(mutex); } static inline int mythread_cond_init(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 *cond = CreateEvent(NULL, FALSE, FALSE, NULL); return *cond == NULL ? -1 : 0; #else InitializeConditionVariable(cond); return 0; #endif } static inline void mythread_cond_destroy(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 CloseHandle(*cond); #else (void)cond; #endif } static inline void mythread_cond_signal(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 SetEvent(*cond); #else WakeConditionVariable(cond); #endif } static inline void mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) { #ifdef MYTHREAD_WIN95 LeaveCriticalSection(mutex); WaitForSingleObject(*cond, INFINITE); EnterCriticalSection(mutex); #else BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE); assert(ret); (void)ret; #endif } static inline int mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, const mythread_condtime *condtime) { #ifdef MYTHREAD_WIN95 LeaveCriticalSection(mutex); #endif DWORD elapsed = GetTickCount() - condtime->start; DWORD timeout = elapsed >= condtime->timeout ? 0 : condtime->timeout - elapsed; #ifdef MYTHREAD_WIN95 DWORD ret = WaitForSingleObject(*cond, timeout); assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT); EnterCriticalSection(mutex); return ret == WAIT_TIMEOUT; #else BOOL ret = SleepConditionVariableCS(cond, mutex, timeout); assert(ret || GetLastError() == ERROR_TIMEOUT); return !ret; #endif } static inline void mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, uint32_t timeout) { (void)cond; condtime->start = GetTickCount(); condtime->timeout = timeout; } #endif #endif diff --git a/contrib/xz/src/liblzma/api/lzma/container.h b/contrib/xz/src/liblzma/api/lzma/container.h index 48a643651f3d..2849fbfd3c51 100644 --- a/contrib/xz/src/liblzma/api/lzma/container.h +++ b/contrib/xz/src/liblzma/api/lzma/container.h @@ -1,967 +1,968 @@ /** * \file lzma/container.h * \brief File formats * \note Never include this file directly. Use instead. */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /************ * Encoding * ************/ /** * \brief Default compression preset * * It's not straightforward to recommend a default preset, because in some * cases keeping the resource usage relatively low is more important that * getting the maximum compression ratio. */ #define LZMA_PRESET_DEFAULT UINT32_C(6) /** * \brief Mask for preset level * * This is useful only if you need to extract the level from the preset * variable. That should be rare. */ #define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) /* * Preset flags * * Currently only one flag is defined. */ /** * \brief Extreme compression preset * * This flag modifies the preset to make the encoding significantly slower * while improving the compression ratio only marginally. This is useful * when you don't mind spending time to get as small result as possible. * * This flag doesn't affect the memory usage requirements of the decoder (at * least not significantly). The memory usage of the encoder may be increased * a little but only at the lowest preset levels (0-3). */ #define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) /** * \brief Multithreading options */ typedef struct { /** * \brief Flags * * Set this to zero if no flags are wanted. * * Encoder: No flags are currently supported. * * Decoder: Bitwise-or of zero or more of the decoder flags: * - LZMA_TELL_NO_CHECK * - LZMA_TELL_UNSUPPORTED_CHECK * - LZMA_TELL_ANY_CHECK * - LZMA_IGNORE_CHECK * - LZMA_CONCATENATED * - LZMA_FAIL_FAST */ uint32_t flags; /** * \brief Number of worker threads to use */ uint32_t threads; /** * \brief Encoder only: Maximum uncompressed size of a Block * * The encoder will start a new .xz Block every block_size bytes. * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code() * the caller may tell liblzma to start a new Block earlier. * * With LZMA2, a recommended block size is 2-4 times the LZMA2 * dictionary size. With very small dictionaries, it is recommended * to use at least 1 MiB block size for good compression ratio, even * if this is more than four times the dictionary size. Note that * these are only recommendations for typical use cases; feel free * to use other values. Just keep in mind that using a block size * less than the LZMA2 dictionary size is waste of RAM. * * Set this to 0 to let liblzma choose the block size depending * on the compression options. For LZMA2 it will be 3*dict_size * or 1 MiB, whichever is more. * * For each thread, about 3 * block_size bytes of memory will be * allocated. This may change in later liblzma versions. If so, * the memory usage will probably be reduced, not increased. */ uint64_t block_size; /** * \brief Timeout to allow lzma_code() to return early * * Multithreading can make liblzma consume input and produce * output in a very bursty way: it may first read a lot of input * to fill internal buffers, then no input or output occurs for * a while. * * In single-threaded mode, lzma_code() won't return until it has * either consumed all the input or filled the output buffer. If * this is done in multithreaded mode, it may cause a call * lzma_code() to take even tens of seconds, which isn't acceptable * in all applications. * * To avoid very long blocking times in lzma_code(), a timeout * (in milliseconds) may be set here. If lzma_code() would block * longer than this number of milliseconds, it will return with * LZMA_OK. Reasonable values are 100 ms or more. The xz command * line tool uses 300 ms. * * If long blocking times are acceptable, set timeout to a special * value of 0. This will disable the timeout mechanism and will make * lzma_code() block until all the input is consumed or the output * buffer has been filled. * * \note Even with a timeout, lzma_code() might sometimes take * a long time to return. No timing guarantees are made. */ uint32_t timeout; /** * \brief Encoder only: Compression preset * * The preset is set just like with lzma_easy_encoder(). * The preset is ignored if filters below is non-NULL. */ uint32_t preset; /** * \brief Encoder only: Filter chain (alternative to a preset) * * If this is NULL, the preset above is used. Otherwise the preset * is ignored and the filter chain specified here is used. */ const lzma_filter *filters; /** * \brief Encoder only: Integrity check type * * See check.h for available checks. The xz command line tool * defaults to LZMA_CHECK_CRC64, which is a good choice if you * are unsure. */ lzma_check check; /* * Reserved space to allow possible future extensions without * breaking the ABI. You should not touch these, because the names * of these variables may change. These are and will never be used * with the currently supported options, so it is safe to leave these * uninitialized. */ /** \private Reserved member. */ lzma_reserved_enum reserved_enum1; /** \private Reserved member. */ lzma_reserved_enum reserved_enum2; /** \private Reserved member. */ lzma_reserved_enum reserved_enum3; /** \private Reserved member. */ uint32_t reserved_int1; /** \private Reserved member. */ uint32_t reserved_int2; /** \private Reserved member. */ uint32_t reserved_int3; /** \private Reserved member. */ uint32_t reserved_int4; /** * \brief Memory usage limit to reduce the number of threads * * Encoder: Ignored. * * Decoder: * * If the number of threads has been set so high that more than * memlimit_threading bytes of memory would be needed, the number * of threads will be reduced so that the memory usage will not exceed * memlimit_threading bytes. However, if memlimit_threading cannot * be met even in single-threaded mode, then decoding will continue * in single-threaded mode and memlimit_threading may be exceeded * even by a large amount. That is, memlimit_threading will never make * lzma_code() return LZMA_MEMLIMIT_ERROR. To truly cap the memory * usage, see memlimit_stop below. * * Setting memlimit_threading to UINT64_MAX or a similar huge value * means that liblzma is allowed to keep the whole compressed file * and the whole uncompressed file in memory in addition to the memory * needed by the decompressor data structures used by each thread! * In other words, a reasonable value limit must be set here or it * will cause problems sooner or later. If you have no idea what * a reasonable value could be, try lzma_physmem() / 4 as a starting * point. Setting this limit will never prevent decompression of * a file; this will only reduce the number of threads. * * If memlimit_threading is greater than memlimit_stop, then the value * of memlimit_stop will be used for both. */ uint64_t memlimit_threading; /** * \brief Memory usage limit that should never be exceeded * * Encoder: Ignored. * * Decoder: If decompressing will need more than this amount of * memory even in the single-threaded mode, then lzma_code() will * return LZMA_MEMLIMIT_ERROR. */ uint64_t memlimit_stop; /** \private Reserved member. */ uint64_t reserved_int7; /** \private Reserved member. */ uint64_t reserved_int8; /** \private Reserved member. */ void *reserved_ptr1; /** \private Reserved member. */ void *reserved_ptr2; /** \private Reserved member. */ void *reserved_ptr3; /** \private Reserved member. */ void *reserved_ptr4; } lzma_mt; /** * \brief Calculate approximate memory usage of easy encoder * * This function is a wrapper for lzma_raw_encoder_memusage(). * * \param preset Compression preset (level and possible flags) * * \return Number of bytes of memory required for the given * preset when encoding or UINT64_MAX on error. */ extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) lzma_nothrow lzma_attr_pure; /** * \brief Calculate approximate decoder memory usage of a preset * * This function is a wrapper for lzma_raw_decoder_memusage(). * * \param preset Compression preset (level and possible flags) * * \return Number of bytes of memory required to decompress a file * that was compressed using the given preset or UINT64_MAX * on error. */ extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) lzma_nothrow lzma_attr_pure; /** * \brief Initialize .xz Stream encoder using a preset number * * This function is intended for those who just want to use the basic features * of liblzma (that is, most developers out there). * * If initialization fails (return value is not LZMA_OK), all the memory * allocated for *strm by liblzma is always freed. Thus, there is no need * to call lzma_end() after failed initialization. * * If initialization succeeds, use lzma_code() to do the actual encoding. * Valid values for `action' (the second argument of lzma_code()) are * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param preset Compression preset to use. A preset consist of level * number and zero or more flags. Usually flags aren't * used, so preset is simply a number [0, 9] which match * the options -0 ... -9 of the xz command line tool. * Additional flags can be be set using bitwise-or with * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. * \param check Integrity check type to use. See check.h for available * checks. The xz command line tool defaults to * LZMA_CHECK_CRC64, which is a good choice if you are * unsure. LZMA_CHECK_CRC32 is good too as long as the * uncompressed file is not many gigabytes. * * \return Possible lzma_ret values: * - LZMA_OK: Initialization succeeded. Use lzma_code() to * encode your data. * - LZMA_MEM_ERROR: Memory allocation failed. * - LZMA_OPTIONS_ERROR: The given compression preset is not * supported by this build of liblzma. * - LZMA_UNSUPPORTED_CHECK: The given check type is not * supported by this liblzma build. * - LZMA_PROG_ERROR: One or more of the parameters have values * that will never be valid. For example, strm == NULL. */ extern LZMA_API(lzma_ret) lzma_easy_encoder( lzma_stream *strm, uint32_t preset, lzma_check check) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Single-call .xz Stream encoding using a preset number * * The maximum required output buffer size can be calculated with * lzma_stream_buffer_bound(). * * \param preset Compression preset to use. See the description * in lzma_easy_encoder(). * \param check Type of the integrity check to calculate from * uncompressed data. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param[out] out Beginning of the output buffer * \param[out] out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return Possible lzma_ret values: * - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( uint32_t preset, lzma_check check, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Initialize .xz Stream encoder using a custom filter chain * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. See filters.h for more * information. * \param check Type of the integrity check to calculate from * uncompressed data. * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, const lzma_filter *filters, lzma_check check) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Calculate approximate memory usage of multithreaded .xz encoder * * Since doing the encoding in threaded mode doesn't affect the memory * requirements of single-threaded decompressor, you can use * lzma_easy_decoder_memusage(options->preset) or * lzma_raw_decoder_memusage(options->filters) to calculate * the decompressor memory requirements. * * \param options Compression options * * \return Number of bytes of memory required for encoding with the * given options. If an error occurs, for example due to * unsupported preset or filter chain, UINT64_MAX is returned. */ extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage( const lzma_mt *options) lzma_nothrow lzma_attr_pure; /** * \brief Initialize multithreaded .xz Stream encoder * * This provides the functionality of lzma_easy_encoder() and * lzma_stream_encoder() as a single function for multithreaded use. * * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH, * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be * added in the future. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param options Pointer to multithreaded compression options * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_encoder_mt( lzma_stream *strm, const lzma_mt *options) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize .lzma encoder (legacy file format) * * The .lzma format is sometimes called the LZMA_Alone format, which is the * reason for the name of this function. The .lzma format supports only the * LZMA1 filter. There is no support for integrity checks like CRC32. * * Use this function if and only if you need to create files readable by * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format * is strongly recommended. * * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. * No kind of flushing is supported, because the file format doesn't make * it possible. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param options Pointer to encoder options * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_alone_encoder( lzma_stream *strm, const lzma_options_lzma *options) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Calculate output buffer size for single-call Stream encoder * - * When trying to compress uncompressible data, the encoded size will be + * When trying to compress incompressible data, the encoded size will be * slightly bigger than the input data. This function calculates how much * output buffer space is required to be sure that lzma_stream_buffer_encode() * doesn't return LZMA_BUF_ERROR. * * The calculated value is not exact, but it is guaranteed to be big enough. * The actual maximum output space required may be slightly smaller (up to * about 100 bytes). This should not be a problem in practice. * * If the calculated maximum size doesn't fit into size_t or would make the * Stream grow past LZMA_VLI_MAX (which should never happen in practice), * zero is returned to indicate the error. * * \note The limit calculated by this function applies only to * single-call encoding. Multi-call encoding may (and probably * will) have larger maximum expansion when encoding - * uncompressible data. Currently there is no function to + * incompressible data. Currently there is no function to * calculate the maximum expansion of multi-call encoding. * * \param uncompressed_size Size in bytes of the uncompressed * input data * * \return Maximum number of bytes needed to store the compressed data. */ extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) lzma_nothrow; /** * \brief Single-call .xz Stream encoder * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. See filters.h for more * information. * \param check Type of the integrity check to calculate from * uncompressed data. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param[out] out Beginning of the output buffer * \param[out] out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return Possible lzma_ret values: * - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( lzma_filter *filters, lzma_check check, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief MicroLZMA encoder * * The MicroLZMA format is a raw LZMA stream whose first byte (always 0x00) * has been replaced with bitwise-negation of the LZMA properties (lc/lp/pb). * This encoding ensures that the first byte of MicroLZMA stream is never * 0x00. There is no end of payload marker and thus the uncompressed size * must be stored separately. For the best error detection the dictionary * size should be stored separately as well but alternatively one may use * the uncompressed size as the dictionary size when decoding. * * With the MicroLZMA encoder, lzma_code() behaves slightly unusually. * The action argument must be LZMA_FINISH and the return value will never be * LZMA_OK. Thus the encoding is always done with a single lzma_code() after * the initialization. The benefit of the combination of initialization * function and lzma_code() is that memory allocations can be re-used for * better performance. * * lzma_code() will try to encode as much input as is possible to fit into * the given output buffer. If not all input can be encoded, the stream will * be finished without encoding all the input. The caller must check both * input and output buffer usage after lzma_code() (total_in and total_out * in lzma_stream can be convenient). Often lzma_code() can fill the output * buffer completely if there is a lot of input, but sometimes a few bytes * may remain unused because the next LZMA symbol would require more space. * * lzma_stream.avail_out must be at least 6. Otherwise LZMA_PROG_ERROR * will be returned. * * The LZMA dictionary should be reasonably low to speed up the encoder * re-initialization. A good value is bigger than the resulting * uncompressed size of most of the output chunks. For example, if output * size is 4 KiB, dictionary size of 32 KiB or 64 KiB is good. If the * data compresses extremely well, even 128 KiB may be useful. * * The MicroLZMA format and this encoder variant were made with the EROFS * file system in mind. This format may be convenient in other embedded * uses too where many small streams are needed. XZ Embedded includes a * decoder for this format. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param options Pointer to encoder options * * \return Possible lzma_ret values: * - LZMA_STREAM_END: All good. Check the amounts of input used * and output produced. Store the amount of input used * (uncompressed size) as it needs to be known to decompress * the data. * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR: In addition to the generic reasons for this * error code, this may also be returned if there isn't enough * output space (6 bytes) to create a valid MicroLZMA stream. */ extern LZMA_API(lzma_ret) lzma_microlzma_encoder( - lzma_stream *strm, const lzma_options_lzma *options); + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow; /************ * Decoding * ************/ /** * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream * being decoded has no integrity check. Note that when used with * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK * if LZMA_TELL_NO_CHECK is used. */ #define LZMA_TELL_NO_CHECK UINT32_C(0x01) /** * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input * stream has an integrity check, but the type of the integrity check is not * supported by this liblzma version or build. Such files can still be * decoded, but the integrity check cannot be verified. */ #define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) /** * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type * of the integrity check is known. The type can then be got with * lzma_get_check(). */ #define LZMA_TELL_ANY_CHECK UINT32_C(0x04) /** * This flag makes lzma_code() not calculate and verify the integrity check * of the compressed data in .xz files. This means that invalid integrity * check values won't be detected and LZMA_DATA_ERROR won't be returned in * such cases. * * This flag only affects the checks of the compressed data itself; the CRC32 * values in the .xz headers will still be verified normally. * * Don't use this flag unless you know what you are doing. Possible reasons * to use this flag: * * - Trying to recover data from a corrupt .xz file. * * - Speeding up decompression, which matters mostly with SHA-256 * or with files that have compressed extremely well. It's recommended * to not use this flag for this purpose unless the file integrity is * verified externally in some other way. * * Support for this flag was added in liblzma 5.1.4beta. */ #define LZMA_IGNORE_CHECK UINT32_C(0x10) /** * This flag enables decoding of concatenated files with file formats that * allow concatenating compressed files as is. From the formats currently * supported by liblzma, only the .xz and .lz formats allow concatenated * files. Concatenated files are not allowed with the legacy .lzma format. * * This flag also affects the usage of the `action' argument for lzma_code(). * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END * unless LZMA_FINISH is used as `action'. Thus, the application has to set * LZMA_FINISH in the same way as it does when encoding. * * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. */ #define LZMA_CONCATENATED UINT32_C(0x08) /** * This flag makes the threaded decoder report errors (like LZMA_DATA_ERROR) * as soon as they are detected. This saves time when the application has no * interest in a partially decompressed truncated or corrupt file. Note that * due to timing randomness, if the same truncated or corrupt input is * decompressed multiple times with this flag, a different amount of output * may be produced by different runs, and even the error code might vary. * * When using LZMA_FAIL_FAST, it is recommended to use LZMA_FINISH to tell * the decoder when no more input will be coming because it can help fast * detection and reporting of truncated files. Note that in this situation * truncated files might be diagnosed with LZMA_DATA_ERROR instead of * LZMA_OK or LZMA_BUF_ERROR! * * Without this flag the threaded decoder will provide as much output as * possible at first and then report the pending error. This default behavior * matches the single-threaded decoder and provides repeatable behavior * with truncated or corrupt input. There are a few special cases where the * behavior can still differ like memory allocation failures (LZMA_MEM_ERROR). * * Single-threaded decoders currently ignore this flag. * * Support for this flag was added in liblzma 5.3.3alpha. Note that in older * versions this flag isn't supported (LZMA_OPTIONS_ERROR) even by functions * that ignore this flag in newer liblzma versions. */ #define LZMA_FAIL_FAST UINT32_C(0x20) /** * \brief Initialize .xz Stream decoder * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma * 5.2.3 and earlier don't allow 0 here and return * LZMA_PROG_ERROR; later versions treat 0 as if 1 * had been specified. * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, * LZMA_CONCATENATED, LZMA_FAIL_FAST * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. * - LZMA_OPTIONS_ERROR: Unsupported flags * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_decoder( lzma_stream *strm, uint64_t memlimit, uint32_t flags) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize multithreaded .xz Stream decoder * * The decoder can decode multiple Blocks in parallel. This requires that each * Block Header contains the Compressed Size and Uncompressed size fields * which are added by the multi-threaded encoder, see lzma_stream_encoder_mt(). * * A Stream with one Block will only utilize one thread. A Stream with multiple * Blocks but without size information in Block Headers will be processed in * single-threaded mode in the same way as done by lzma_stream_decoder(). * Concatenated Streams are processed one Stream at a time; no inter-Stream * parallelization is done. * * This function behaves like lzma_stream_decoder() when options->threads == 1 * and options->memlimit_threading <= 1. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param options Pointer to multithreaded compression options * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. * - LZMA_OPTIONS_ERROR: Unsupported flags. * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_decoder_mt( lzma_stream *strm, const lzma_mt *options) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Decode .xz, .lzma, and .lz (lzip) files with autodetection * * This decoder autodetects between the .xz, .lzma, and .lz file formats, * and calls lzma_stream_decoder(), lzma_alone_decoder(), or * lzma_lzip_decoder() once the type of the input file has been detected. * * Support for .lz was added in 5.4.0. * * If the flag LZMA_CONCATENATED is used and the input is a .lzma file: * For historical reasons concatenated .lzma files aren't supported. * If there is trailing data after one .lzma stream, lzma_code() will * return LZMA_DATA_ERROR. (lzma_alone_decoder() doesn't have such a check * as it doesn't support any decoder flags. It will return LZMA_STREAM_END * after one .lzma stream.) * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma * 5.2.3 and earlier don't allow 0 here and return * LZMA_PROG_ERROR; later versions treat 0 as if 1 * had been specified. * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, * LZMA_CONCATENATED, LZMA_FAIL_FAST * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. * - LZMA_OPTIONS_ERROR: Unsupported flags * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_auto_decoder( lzma_stream *strm, uint64_t memlimit, uint32_t flags) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize .lzma decoder (legacy file format) * * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. * There is no need to use LZMA_FINISH, but it's allowed because it may * simplify certain types of applications. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma * 5.2.3 and earlier don't allow 0 here and return * LZMA_PROG_ERROR; later versions treat 0 as if 1 * had been specified. * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_alone_decoder( lzma_stream *strm, uint64_t memlimit) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize .lz (lzip) decoder (a foreign file format) * * This decoder supports the .lz format version 0 and the unextended .lz * format version 1: * * - Files in the format version 0 were produced by lzip 1.3 and older. * Such files aren't common but may be found from file archives * as a few source packages were released in this format. People * might have old personal files in this format too. Decompression * support for the format version 0 was removed in lzip 1.18. * * - lzip 1.3 added decompression support for .lz format version 1 files. * Compression support was added in lzip 1.4. In lzip 1.6 the .lz format * version 1 was extended to support the Sync Flush marker. This extension * is not supported by liblzma. lzma_code() will return LZMA_DATA_ERROR * at the location of the Sync Flush marker. In practice files with * the Sync Flush marker are very rare and thus liblzma can decompress * almost all .lz files. * * Just like with lzma_stream_decoder() for .xz files, LZMA_CONCATENATED * should be used when decompressing normal standalone .lz files. * * The .lz format allows putting non-.lz data at the end of a file after at * least one valid .lz member. That is, one can append custom data at the end * of a .lz file and the decoder is required to ignore it. In liblzma this * is relevant only when LZMA_CONCATENATED is used. In that case lzma_code() * will return LZMA_STREAM_END and leave lzma_stream.next_in pointing to * the first byte of the non-.lz data. An exception to this is if the first * 1-3 bytes of the non-.lz data are identical to the .lz magic bytes * (0x4C, 0x5A, 0x49, 0x50; "LZIP" in US-ASCII). In such a case the 1-3 bytes * will have been ignored by lzma_code(). If one wishes to locate the non-.lz * data reliably, one must ensure that the first byte isn't 0x4C. Actually * one should ensure that none of the first four bytes of trailing data are * equal to the magic bytes because lzip >= 1.20 requires it by default. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. * \param flags Bitwise-or of flags, or zero for no flags. * All decoder flags listed above are supported * although only LZMA_CONCATENATED and (in very rare * cases) LZMA_IGNORE_CHECK are actually useful. * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * and LZMA_FAIL_FAST do nothing. LZMA_TELL_ANY_CHECK * is supported for consistency only as CRC32 is * always used in the .lz format. * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. * - LZMA_OPTIONS_ERROR: Unsupported flags * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_lzip_decoder( lzma_stream *strm, uint64_t memlimit, uint32_t flags) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Single-call .xz Stream decoder * * \param memlimit Pointer to how much memory the decoder is allowed * to allocate. The value pointed by this pointer is * modified if and only if LZMA_MEMLIMIT_ERROR is * returned. * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * LZMA_IGNORE_CHECK, LZMA_CONCATENATED, * LZMA_FAIL_FAST. Note that LZMA_TELL_ANY_CHECK * is not allowed and will return LZMA_PROG_ERROR. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * *in_pos is updated only if decoding succeeds. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * \param[out] out Beginning of the output buffer * \param[out] out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if decoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return Possible lzma_ret values: * - LZMA_OK: Decoding was successful. * - LZMA_FORMAT_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_DATA_ERROR * - LZMA_NO_CHECK: This can be returned only if using * the LZMA_TELL_NO_CHECK flag. * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using * the LZMA_TELL_UNSUPPORTED_CHECK flag. * - LZMA_MEM_ERROR * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. * The minimum required memlimit value was stored to *memlimit. * - LZMA_BUF_ERROR: Output buffer was too small. * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( uint64_t *memlimit, uint32_t flags, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief MicroLZMA decoder * * See lzma_microlzma_encoder() for more information. * * The lzma_code() usage with this decoder is completely normal. The * special behavior of lzma_code() applies to lzma_microlzma_encoder() only. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param comp_size Compressed size of the MicroLZMA stream. * The caller must somehow know this exactly. * \param uncomp_size Uncompressed size of the MicroLZMA stream. * If the exact uncompressed size isn't known, this * can be set to a value that is at most as big as * the exact uncompressed size would be, but then the * next argument uncomp_size_is_exact must be false. * \param uncomp_size_is_exact * If true, uncomp_size must be exactly correct. * This will improve error detection at the end of * the stream. If the exact uncompressed size isn't * known, this must be false. uncomp_size must still * be at most as big as the exact uncompressed size * is. Setting this to false when the exact size is * known will work but error detection at the end of * the stream will be weaker. * \param dict_size LZMA dictionary size that was used when * compressing the data. It is OK to use a bigger * value too but liblzma will then allocate more * memory than would actually be required and error * detection will be slightly worse. (Note that with * the implementation in XZ Embedded it doesn't * affect the memory usage if one specifies bigger * dictionary than actually required.) * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_microlzma_decoder( lzma_stream *strm, uint64_t comp_size, uint64_t uncomp_size, lzma_bool uncomp_size_is_exact, - uint32_t dict_size); + uint32_t dict_size) lzma_nothrow; diff --git a/contrib/xz/src/liblzma/api/lzma/filter.h b/contrib/xz/src/liblzma/api/lzma/filter.h index fa61245544d3..1d887b4f2f43 100644 --- a/contrib/xz/src/liblzma/api/lzma/filter.h +++ b/contrib/xz/src/liblzma/api/lzma/filter.h @@ -1,770 +1,770 @@ /** * \file lzma/filter.h * \brief Common filter related types and functions * \note Never include this file directly. Use instead. */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** * \brief Maximum number of filters in a chain * * A filter chain can have 1-4 filters, of which three are allowed to change * the size of the data. Usually only one or two filters are needed. */ #define LZMA_FILTERS_MAX 4 /** * \brief Filter options * * This structure is used to pass a Filter ID and a pointer to the filter's * options to liblzma. A few functions work with a single lzma_filter * structure, while most functions expect a filter chain. * * A filter chain is indicated with an array of lzma_filter structures. * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to * be able to hold any arbitrary filter chain. This is important when * using lzma_block_header_decode() from block.h, because a filter array * that is too small would make liblzma write past the end of the array. */ typedef struct { /** * \brief Filter ID * * Use constants whose name begin with `LZMA_FILTER_' to specify * different filters. In an array of lzma_filter structures, use * LZMA_VLI_UNKNOWN to indicate end of filters. * * \note This is not an enum, because on some systems enums * cannot be 64-bit. */ lzma_vli id; /** * \brief Pointer to filter-specific options structure * * If the filter doesn't need options, set this to NULL. If id is * set to LZMA_VLI_UNKNOWN, options is ignored, and thus * doesn't need be initialized. */ void *options; } lzma_filter; /** * \brief Test if the given Filter ID is supported for encoding * * \param id Filter ID * * \return lzma_bool: * - true if the Filter ID is supported for encoding by this * liblzma build. * - false otherwise. */ extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) lzma_nothrow lzma_attr_const; /** * \brief Test if the given Filter ID is supported for decoding * * \param id Filter ID * * \return lzma_bool: * - true if the Filter ID is supported for decoding by this * liblzma build. * - false otherwise. */ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) lzma_nothrow lzma_attr_const; /** * \brief Copy the filters array * * Copy the Filter IDs and filter-specific options from src to dest. * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that * src is smaller than that. * * Unless the filter-specific options is NULL, the Filter ID has to be * supported by liblzma, because liblzma needs to know the size of every * filter-specific options structure. The filter-specific options are not * validated. If options is NULL, any unsupported Filter IDs are copied * without returning an error. * * Old filter-specific options in dest are not freed, so dest doesn't * need to be initialized by the caller in any way. * * If an error occurs, memory possibly already allocated by this function * is always freed. liblzma versions older than 5.2.7 may modify the dest * array and leave its contents in an undefined state if an error occurs. * liblzma 5.2.7 and newer only modify the dest array when returning LZMA_OK. * * \param src Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * \param[out] dest Destination filter array * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options * is not NULL. * - LZMA_PROG_ERROR: src or dest is NULL. */ extern LZMA_API(lzma_ret) lzma_filters_copy( const lzma_filter *src, lzma_filter *dest, const lzma_allocator *allocator) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Free the options in the array of lzma_filter structures * * This frees the filter chain options. The filters array itself is not freed. * * The filters array must have at most LZMA_FILTERS_MAX + 1 elements * including the terminating element which must have .id = LZMA_VLI_UNKNOWN. * For all elements before the terminating element: * - options will be freed using the given lzma_allocator or, * if allocator is NULL, using free(). * - options will be set to NULL. * - id will be set to LZMA_VLI_UNKNOWN. * * If filters is NULL, this does nothing. Again, this never frees the * filters array itself. * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). */ extern LZMA_API(void) lzma_filters_free( lzma_filter *filters, const lzma_allocator *allocator) lzma_nothrow; /** * \brief Calculate approximate memory requirements for raw encoder * * This function can be used to calculate the memory requirements for * Block and Stream encoders too because Block and Stream encoders don't * need significantly more memory than raw encoder. * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Number of bytes of memory required for the given * filter chain when encoding or UINT64_MAX on error. */ extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) lzma_nothrow lzma_attr_pure; /** * \brief Calculate approximate memory requirements for raw decoder * * This function can be used to calculate the memory requirements for * Block and Stream decoders too because Block and Stream decoders don't * need significantly more memory than raw decoder. * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Number of bytes of memory required for the given * filter chain when decoding or UINT64_MAX on error. */ extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) lzma_nothrow lzma_attr_pure; /** * \brief Initialize raw encoder * * This function may be useful when implementing custom file formats. * * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the * filter chain supports it), or LZMA_FINISH. * * \param strm Pointer to lzma_stream that is at least * initialized with LZMA_STREAM_INIT. * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_raw_encoder( lzma_stream *strm, const lzma_filter *filters) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize raw decoder * * The initialization of raw decoder goes similarly to raw encoder. * * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using * LZMA_FINISH is not required, it is supported just for convenience. * * \param strm Pointer to lzma_stream that is at least * initialized with LZMA_STREAM_INIT. * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_raw_decoder( lzma_stream *strm, const lzma_filter *filters) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Update the filter chain in the encoder * * This function may be called after lzma_code() has returned LZMA_STREAM_END * when LZMA_FULL_BARRIER, LZMA_FULL_FLUSH, or LZMA_SYNC_FLUSH was used: * * - After LZMA_FULL_BARRIER or LZMA_FULL_FLUSH: Single-threaded .xz Stream * encoder (lzma_stream_encoder()) and (since liblzma 5.4.0) multi-threaded * Stream encoder (lzma_stream_encoder_mt()) allow setting a new filter * chain to be used for the next Block(s). * * - After LZMA_SYNC_FLUSH: Raw encoder (lzma_raw_encoder()), - * Block encocder (lzma_block_encoder()), and single-threaded .xz Stream + * Block encoder (lzma_block_encoder()), and single-threaded .xz Stream * encoder (lzma_stream_encoder()) allow changing certain filter-specific * options in the middle of encoding. The actual filters in the chain * (Filter IDs) must not be changed! Currently only the lc, lp, and pb * options of LZMA2 (not LZMA1) can be changed this way. * * - In the future some filters might allow changing some of their options * without any barrier or flushing but currently such filters don't exist. * * This function may also be called when no data has been compressed yet * although this is rarely useful. In that case, this function will behave * as if LZMA_FULL_FLUSH (Stream encoders) or LZMA_SYNC_FLUSH (Raw or Block * encoder) had been used right before calling this function. * * \param strm Pointer to lzma_stream that is at least * initialized with LZMA_STREAM_INIT. * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_MEMLIMIT_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_filters_update( lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; /** * \brief Single-call raw encoder * * \note There is no function to calculate how big output buffer * would surely be big enough. (lzma_stream_buffer_bound() * works only for lzma_stream_buffer_encode(); raw encoder * won't necessarily meet that bound.) * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param[out] out Beginning of the output buffer * \param[out] out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return Possible lzma_ret values: * - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( const lzma_filter *filters, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Single-call raw decoder * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * *in_pos is updated only if decoding succeeds. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * \param[out] out Beginning of the output buffer * \param[out] out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return Possible lzma_ret values: * - LZMA_OK: Decoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( const lzma_filter *filters, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Get the size of the Filter Properties field * * This function may be useful when implementing custom file formats * using the raw encoder and decoder. * * \note This function validates the Filter ID, but does not * necessarily validate the options. Thus, it is possible * that this returns LZMA_OK while the following call to * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. * * \param[out] size Pointer to uint32_t to hold the size of the properties * \param filter Filter ID and options (the size of the properties may * vary depending on the options) * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_properties_size( uint32_t *size, const lzma_filter *filter) lzma_nothrow; /** * \brief Encode the Filter Properties field * * \note Even this function won't validate more options than actually * necessary. Thus, it is possible that encoding the properties * succeeds but using the same options to initialize the encoder * will fail. * * \note If lzma_properties_size() indicated that the size * of the Filter Properties field is zero, calling * lzma_properties_encode() is not required, but it * won't do any harm either. * * \param filter Filter ID and options * \param[out] props Buffer to hold the encoded options. The size of * the buffer must have been already determined with * lzma_properties_size(). * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_properties_encode( const lzma_filter *filter, uint8_t *props) lzma_nothrow; /** * \brief Decode the Filter Properties field * * \param filter filter->id must have been set to the correct * Filter ID. filter->options doesn't need to be * initialized (it's not freed by this function). The * decoded options will be stored in filter->options; * it's application's responsibility to free it when * appropriate. filter->options is set to NULL if * there are no properties or if an error occurs. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * and in case of an error, also free(). * \param props Input buffer containing the properties. * \param props_size Size of the properties. This must be the exact * size; giving too much or too little input will * return LZMA_OPTIONS_ERROR. * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR */ extern LZMA_API(lzma_ret) lzma_properties_decode( lzma_filter *filter, const lzma_allocator *allocator, const uint8_t *props, size_t props_size) lzma_nothrow; /** * \brief Calculate encoded size of a Filter Flags field * * Knowing the size of Filter Flags is useful to know when allocating * memory to hold the encoded Filter Flags. * * \note If you need to calculate size of List of Filter Flags, * you need to loop over every lzma_filter entry. * * \param[out] size Pointer to integer to hold the calculated size * \param filter Filter ID and associated options whose encoded * size is to be calculated * * \return Possible lzma_ret values: * - LZMA_OK: *size set successfully. Note that this doesn't * guarantee that filter->options is valid, thus * lzma_filter_flags_encode() may still fail. * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. * - LZMA_PROG_ERROR: Invalid options */ extern LZMA_API(lzma_ret) lzma_filter_flags_size( uint32_t *size, const lzma_filter *filter) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Encode Filter Flags into given buffer * * In contrast to some functions, this doesn't allocate the needed buffer. * This is due to how this function is used internally by liblzma. * * \param filter Filter ID and options to be encoded * \param[out] out Beginning of the output buffer * \param[out] out_pos out[*out_pos] is the next write position. This * is updated by the encoder. * \param out_size out[out_size] is the first byte to not write. * * \return Possible lzma_ret values: * - LZMA_OK: Encoding was successful. * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. * - LZMA_PROG_ERROR: Invalid options or not enough output * buffer space (you should have checked it with * lzma_filter_flags_size()). */ extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Decode Filter Flags from given buffer * * The decoded result is stored into *filter. The old value of * filter->options is not free()d. If anything other than LZMA_OK * is returned, filter->options is set to NULL. * * \param[out] filter Destination filter. The decoded Filter ID will * be stored in filter->id. If options are needed * they will be allocated and the pointer will be * stored in filter->options. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param[out] in_pos The next byte will be read from in[*in_pos]. * *in_pos is updated only if decoding succeeds. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_filter_flags_decode( lzma_filter *filter, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size) lzma_nothrow lzma_attr_warn_unused_result; /*********** * Strings * ***********/ /** * \brief Allow or show all filters * * By default only the filters supported in the .xz format are accept by * lzma_str_to_filters() or shown by lzma_str_list_filters(). */ #define LZMA_STR_ALL_FILTERS UINT32_C(0x01) /** * \brief Do not validate the filter chain in lzma_str_to_filters() * * By default lzma_str_to_filters() can return an error if the filter chain * as a whole isn't usable in the .xz format or in the raw encoder or decoder. * With this flag, this validation is skipped. This flag doesn't affect the * handling of the individual filter options. To allow non-.xz filters also * LZMA_STR_ALL_FILTERS is needed. */ #define LZMA_STR_NO_VALIDATION UINT32_C(0x02) /** * \brief Stringify encoder options * * Show the filter-specific options that the encoder will use. * This may be useful for verbose diagnostic messages. * * Note that if options were decoded from .xz headers then the encoder options * may be undefined. This flag shouldn't be used in such a situation. */ #define LZMA_STR_ENCODER UINT32_C(0x10) /** * \brief Stringify decoder options * * Show the filter-specific options that the decoder will use. * This may be useful for showing what filter options were decoded * from file headers. */ #define LZMA_STR_DECODER UINT32_C(0x20) /** * \brief Produce xz-compatible getopt_long() syntax * * That is, "delta:dist=2 lzma2:dict=4MiB,pb=1,lp=1" becomes * "--delta=dist=2 --lzma2=dict=4MiB,pb=1,lp=1". * * This syntax is compatible with xz 5.0.0 as long as the filters and * their options are supported too. */ #define LZMA_STR_GETOPT_LONG UINT32_C(0x40) /** * \brief Use two dashes "--" instead of a space to separate filters * * That is, "delta:dist=2 lzma2:pb=1,lp=1" becomes * "delta:dist=2--lzma2:pb=1,lp=1". This looks slightly odd but this * kind of strings should be usable on the command line without quoting. * However, it is possible that future versions with new filter options * might produce strings that require shell quoting anyway as the exact * set of possible characters isn't frozen for now. * * It is guaranteed that the single quote (') will never be used in * filter chain strings (even if LZMA_STR_NO_SPACES isn't used). */ #define LZMA_STR_NO_SPACES UINT32_C(0x80) /** * \brief Convert a string to a filter chain * * This tries to make it easier to write applications that allow users * to set custom compression options. This only handles the filter * configuration (including presets) but not the number of threads, * block size, check type, or memory limits. * * The input string can be either a preset or a filter chain. Presets * begin with a digit 0-9 and may be followed by zero or more flags * which are lower-case letters. Currently only "e" is supported, matching * LZMA_PRESET_EXTREME. For partial xz command line syntax compatibility, * a preset string may start with a single dash "-". * * A filter chain consists of one or more "filtername:opt1=value1,opt2=value2" * strings separated by one or more spaces. Leading and trailing spaces are * ignored. All names and values must be lower-case. Extra commas in the * option list are ignored. The order of filters is significant: when * encoding, the uncompressed input data goes to the leftmost filter first. * Normally "lzma2" is the last filter in the chain. * * If one wishes to avoid spaces, for example, to avoid shell quoting, * it is possible to use two dashes "--" instead of spaces to separate * the filters. * * For xz command line compatibility, each filter may be prefixed with * two dashes "--" and the colon ":" separating the filter name from * the options may be replaced with an equals sign "=". * * By default, only filters that can be used in the .xz format are accepted. * To allow all filters (LZMA1) use the flag LZMA_STR_ALL_FILTERS. * * By default, very basic validation is done for the filter chain as a whole, * for example, that LZMA2 is only used as the last filter in the chain. * The validation isn't perfect though and it's possible that this function * succeeds but using the filter chain for encoding or decoding will still * result in LZMA_OPTIONS_ERROR. To disable this validation, use the flag * LZMA_STR_NO_VALIDATION. * * The available filter names and their options are available via * lzma_str_list_filters(). See the xz man page for the description * of filter names and options. * * For command line applications, below is an example how an error message * can be displayed. Note the use of an empty string for the field width. * If "^" was used there it would create an off-by-one error except at * the very beginning of the line. * * \code{.c} * const char *str = ...; // From user * lzma_filter filters[LZMA_FILTERS_MAX + 1]; * int pos; * const char *msg = lzma_str_to_filters(str, &pos, filters, 0, NULL); * if (msg != NULL) { * printf("%s: Error in XZ compression options:\n", argv[0]); * printf("%s: %s\n", argv[0], str); * printf("%s: %*s^\n", argv[0], errpos, ""); * printf("%s: %s\n", argv[0], msg); * } * \endcode * * \param str User-supplied string describing a preset or * a filter chain. If a default value is needed and * you don't know what would be good, use "6" since * that is the default preset in xz too. * \param[out] error_pos If this isn't NULL, this value will be set on * both success and on all errors. This tells the * location of the error in the string. This is * an int to make it straightforward to use this * as printf() field width. The value is guaranteed * to be in the range [0, INT_MAX] even if strlen(str) * somehow was greater than INT_MAX. * \param[out] filters An array of lzma_filter structures. There must * be LZMA_FILTERS_MAX + 1 (that is, five) elements * in the array. The old contents are ignored so it * doesn't need to be initialized. This array is * modified only if this function returns NULL. * Once the allocated filter options are no longer * needed, lzma_filters_free() can be used to free the * options (it doesn't free the filters array itself). * \param flags Bitwise-or of zero or more of the flags * LZMA_STR_ALL_FILTERS and LZMA_STR_NO_VALIDATION. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * * \return On success, NULL is returned. On error, a statically-allocated * error message is returned which together with the error_pos * should give some idea what is wrong. */ extern LZMA_API(const char *) lzma_str_to_filters( const char *str, int *error_pos, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Convert a filter chain to a string * * Use cases: * * - Verbose output showing the full encoder options to the user * (use LZMA_STR_ENCODER in flags) * * - Showing the filters and options that are required to decode a file * (use LZMA_STR_DECODER in flags) * * - Showing the filter names without any options in informational messages * where the technical details aren't important (no flags). In this case * the .options in the filters array are ignored and may be NULL even if * a filter has a mandatory options structure. * * Note that even if the filter chain was specified using a preset, * the resulting filter chain isn't reversed to a preset. So if you * specify "6" to lzma_str_to_filters() then lzma_str_from_filters() * will produce a string containing "lzma2". * * \param[out] str On success *str will be set to point to an * allocated string describing the given filter * chain. Old value is ignored. On error *str is * always set to NULL. * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * \param flags Bitwise-or of zero or more of the flags * LZMA_STR_ENCODER, LZMA_STR_DECODER, * LZMA_STR_GETOPT_LONG, and LZMA_STR_NO_SPACES. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_OPTIONS_ERROR: Empty filter chain * (filters[0].id == LZMA_VLI_UNKNOWN) or the filter chain * includes a Filter ID that is not supported by this function. * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_str_from_filters( char **str, const lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief List available filters and/or their options (for help message) * * If a filter_id is given then only one line is created which contains the * filter name. If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then the - * options required for encoding or decoding are listed on the same line too. + * options read by the encoder or decoder are printed on the same line. * * If filter_id is LZMA_VLI_UNKNOWN then all supported .xz-compatible filters * are listed: * * - If neither LZMA_STR_ENCODER nor LZMA_STR_DECODER is used then * the supported filter names are listed on a single line separated * by spaces. * * - If LZMA_STR_ENCODER or LZMA_STR_DECODER is used then filters and * the supported options are listed one filter per line. There won't * be a newline after the last filter. * * - If LZMA_STR_ALL_FILTERS is used then the list will include also * those filters that cannot be used in the .xz format (LZMA1). * * \param str On success *str will be set to point to an * allocated string listing the filters and options. * Old value is ignored. On error *str is always set * to NULL. * \param filter_id Filter ID or LZMA_VLI_UNKNOWN. * \param flags Bitwise-or of zero or more of the flags * LZMA_STR_ALL_FILTERS, LZMA_STR_ENCODER, * LZMA_STR_DECODER, and LZMA_STR_GETOPT_LONG. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * * \return Possible lzma_ret values: * - LZMA_OK * - LZMA_OPTIONS_ERROR: Unsupported filter_id or flags * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_str_list_filters( char **str, lzma_vli filter_id, uint32_t flags, const lzma_allocator *allocator) lzma_nothrow lzma_attr_warn_unused_result; diff --git a/contrib/xz/src/liblzma/api/lzma/lzma12.h b/contrib/xz/src/liblzma/api/lzma/lzma12.h index ad4ce7b1d3bf..8ef6ea5b5010 100644 --- a/contrib/xz/src/liblzma/api/lzma/lzma12.h +++ b/contrib/xz/src/liblzma/api/lzma/lzma12.h @@ -1,569 +1,569 @@ /** * \file lzma/lzma12.h * \brief LZMA1 and LZMA2 filters * \note Never include this file directly. Use instead. */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** * \brief LZMA1 Filter ID (for raw encoder/decoder only, not in .xz) * * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from * accidentally using LZMA when they actually want LZMA2. */ #define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) /** * \brief LZMA1 Filter ID with extended options (for raw encoder/decoder) * * This is like LZMA_FILTER_LZMA1 but with this ID a few extra options * are supported in the lzma_options_lzma structure: * * - A flag to tell the encoder if the end of payload marker (EOPM) alias * end of stream (EOS) marker must be written at the end of the stream. * In contrast, LZMA_FILTER_LZMA1 always writes the end marker. * * - Decoder needs to be told the uncompressed size of the stream * or that it is unknown (using the special value UINT64_MAX). * If the size is known, a flag can be set to allow the presence of * the end marker anyway. In contrast, LZMA_FILTER_LZMA1 always * behaves as if the uncompressed size was unknown. * * This allows handling file formats where LZMA1 streams are used but where * the end marker isn't allowed or where it might not (always) be present. * This extended LZMA1 functionality is provided as a Filter ID for raw * encoder and decoder instead of adding new encoder and decoder initialization * functions because this way it is possible to also use extra filters, * for example, LZMA_FILTER_X86 in a filter chain with LZMA_FILTER_LZMA1EXT, * which might be needed to handle some file formats. */ #define LZMA_FILTER_LZMA1EXT LZMA_VLI_C(0x4000000000000002) /** * \brief LZMA2 Filter ID * * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion - * when trying to compress uncompressible data), possibility to change + * when trying to compress incompressible data), possibility to change * lc/lp/pb in the middle of encoding, and some other internal improvements. */ #define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) /** * \brief Match finders * * Match finder has major effect on both speed and compression ratio. * Usually hash chains are faster than binary trees. * * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better * choice, because binary trees get much higher compression ratio penalty * with LZMA_SYNC_FLUSH. * * The memory usage formulas are only rough estimates, which are closest to * reality when dict_size is a power of two. The formulas are more complex * in reality, and can also change a little between liblzma versions. Use * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage. */ typedef enum { LZMA_MF_HC3 = 0x03, /**< * \brief Hash Chain with 2- and 3-byte hashing * * Minimum nice_len: 3 * * Memory usage: * - dict_size <= 16 MiB: dict_size * 7.5 * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB */ LZMA_MF_HC4 = 0x04, /**< * \brief Hash Chain with 2-, 3-, and 4-byte hashing * * Minimum nice_len: 4 * * Memory usage: * - dict_size <= 32 MiB: dict_size * 7.5 * - dict_size > 32 MiB: dict_size * 6.5 */ LZMA_MF_BT2 = 0x12, /**< * \brief Binary Tree with 2-byte hashing * * Minimum nice_len: 2 * * Memory usage: dict_size * 9.5 */ LZMA_MF_BT3 = 0x13, /**< * \brief Binary Tree with 2- and 3-byte hashing * * Minimum nice_len: 3 * * Memory usage: * - dict_size <= 16 MiB: dict_size * 11.5 * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB */ LZMA_MF_BT4 = 0x14 /**< * \brief Binary Tree with 2-, 3-, and 4-byte hashing * * Minimum nice_len: 4 * * Memory usage: * - dict_size <= 32 MiB: dict_size * 11.5 * - dict_size > 32 MiB: dict_size * 10.5 */ } lzma_match_finder; /** * \brief Test if given match finder is supported * * It is safe to call this with a value that isn't listed in * lzma_match_finder enumeration; the return value will be false. * * There is no way to list which match finders are available in this * particular liblzma version and build. It would be useless, because * a new match finder, which the application developer wasn't aware, * could require giving additional options to the encoder that the older * match finders don't need. * * \param match_finder Match finder ID * * \return lzma_bool: * - true if the match finder is supported by this liblzma build. * - false otherwise. */ extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) lzma_nothrow lzma_attr_const; /** * \brief Compression modes * * This selects the function used to analyze the data produced by the match * finder. */ typedef enum { LZMA_MODE_FAST = 1, /**< * \brief Fast compression * * Fast mode is usually at its best when combined with * a hash chain match finder. */ LZMA_MODE_NORMAL = 2 /**< * \brief Normal compression * * This is usually notably slower than fast mode. Use this * together with binary tree match finders to expose the * full potential of the LZMA1 or LZMA2 encoder. */ } lzma_mode; /** * \brief Test if given compression mode is supported * * It is safe to call this with a value that isn't listed in lzma_mode * enumeration; the return value will be false. * * There is no way to list which modes are available in this particular * liblzma version and build. It would be useless, because a new compression * mode, which the application developer wasn't aware, could require giving * additional options to the encoder that the older modes don't need. * * \param mode Mode ID. * * \return lzma_bool: * - true if the compression mode is supported by this liblzma * build. * - false otherwise. */ extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) lzma_nothrow lzma_attr_const; /** * \brief Options specific to the LZMA1 and LZMA2 filters * * Since LZMA1 and LZMA2 share most of the code, it's simplest to share * the options structure too. For encoding, all but the reserved variables * need to be initialized unless specifically mentioned otherwise. * lzma_lzma_preset() can be used to get a good starting point. * * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. */ typedef struct { /** * \brief Dictionary size in bytes * * Dictionary size indicates how many bytes of the recently processed * uncompressed data is kept in memory. One method to reduce size of * the uncompressed data is to store distance-length pairs, which * indicate what data to repeat from the dictionary buffer. Thus, * the bigger the dictionary, the better the compression ratio * usually is. * * Maximum size of the dictionary depends on multiple things: * - Memory usage limit * - Available address space (not a problem on 64-bit systems) * - Selected match finder (encoder only) * * Currently the maximum dictionary size for encoding is 1.5 GiB * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit * systems for certain match finder implementation reasons. In the * future, there may be match finders that support bigger * dictionaries. * * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. * UINT32_MAX), so increasing the maximum dictionary size of the * encoder won't cause problems for old decoders. * * Because extremely small dictionaries sizes would have unneeded * overhead in the decoder, the minimum dictionary size is 4096 bytes. * * \note When decoding, too big dictionary does no other harm * than wasting memory. */ uint32_t dict_size; # define LZMA_DICT_SIZE_MIN UINT32_C(4096) # define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) /** * \brief Pointer to an initial dictionary * * It is possible to initialize the LZ77 history window using * a preset dictionary. It is useful when compressing many * similar, relatively small chunks of data independently from * each other. The preset dictionary should contain typical * strings that occur in the files being compressed. The most * probable strings should be near the end of the preset dictionary. * * This feature should be used only in special situations. For * now, it works correctly only with raw encoding and decoding. * Currently none of the container formats supported by * liblzma allow preset dictionary when decoding, thus if * you create a .xz or .lzma file with preset dictionary, it * cannot be decoded with the regular decoder functions. In the * future, the .xz format will likely get support for preset * dictionary though. */ const uint8_t *preset_dict; /** * \brief Size of the preset dictionary * * Specifies the size of the preset dictionary. If the size is * bigger than dict_size, only the last dict_size bytes are * processed. * * This variable is read only when preset_dict is not NULL. * If preset_dict is not NULL but preset_dict_size is zero, * no preset dictionary is used (identical to only setting * preset_dict to NULL). */ uint32_t preset_dict_size; /** * \brief Number of literal context bits * * How many of the highest bits of the previous uncompressed * eight-bit byte (also known as `literal') are taken into * account when predicting the bits of the next literal. * * E.g. in typical English text, an upper-case letter is * often followed by a lower-case letter, and a lower-case * letter is usually followed by another lower-case letter. * In the US-ASCII character set, the highest three bits are 010 * for upper-case letters and 011 for lower-case letters. * When lc is at least 3, the literal coding can take advantage of * this property in the uncompressed data. * * There is a limit that applies to literal context bits and literal * position bits together: lc + lp <= 4. Without this limit the * decoding could become very slow, which could have security related * results in some cases like email servers doing virus scanning. * This limit also simplifies the internal implementation in liblzma. * * There may be LZMA1 streams that have lc + lp > 4 (maximum possible * lc would be 8). It is not possible to decode such streams with * liblzma. */ uint32_t lc; # define LZMA_LCLP_MIN 0 # define LZMA_LCLP_MAX 4 # define LZMA_LC_DEFAULT 3 /** * \brief Number of literal position bits * * lp affects what kind of alignment in the uncompressed data is * assumed when encoding literals. A literal is a single 8-bit byte. * See pb below for more information about alignment. */ uint32_t lp; # define LZMA_LP_DEFAULT 0 /** * \brief Number of position bits * * pb affects what kind of alignment in the uncompressed data is * assumed in general. The default means four-byte alignment * (2^ pb =2^2=4), which is often a good choice when there's * no better guess. * * When the alignment is known, setting pb accordingly may reduce * the file size a little. E.g. with text files having one-byte * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can * improve compression slightly. For UTF-16 text, pb=1 is a good * choice. If the alignment is an odd number like 3 bytes, pb=0 * might be the best choice. * * Even though the assumed alignment can be adjusted with pb and * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment. * It might be worth taking into account when designing file formats * that are likely to be often compressed with LZMA1 or LZMA2. */ uint32_t pb; # define LZMA_PB_MIN 0 # define LZMA_PB_MAX 4 # define LZMA_PB_DEFAULT 2 /** Compression mode */ lzma_mode mode; /** * \brief Nice length of a match * * This determines how many bytes the encoder compares from the match * candidates when looking for the best match. Once a match of at * least nice_len bytes long is found, the encoder stops looking for * better candidates and encodes the match. (Naturally, if the found * match is actually longer than nice_len, the actual length is * encoded; it's not truncated to nice_len.) * * Bigger values usually increase the compression ratio and * compression time. For most files, 32 to 128 is a good value, * which gives very good compression ratio at good speed. * * The exact minimum value depends on the match finder. The maximum * is 273, which is the maximum length of a match that LZMA1 and * LZMA2 can encode. */ uint32_t nice_len; /** Match finder ID */ lzma_match_finder mf; /** * \brief Maximum search depth in the match finder * * For every input byte, match finder searches through the hash chain * or binary tree in a loop, each iteration going one step deeper in * the chain or tree. The searching stops if * - a match of at least nice_len bytes long is found; * - all match candidates from the hash chain or binary tree have * been checked; or * - maximum search depth is reached. * * Maximum search depth is needed to prevent the match finder from * wasting too much time in case there are lots of short match * candidates. On the other hand, stopping the search before all * candidates have been checked can reduce compression ratio. * * Setting depth to zero tells liblzma to use an automatic default * value, that depends on the selected match finder and nice_len. * The default is in the range [4, 200] or so (it may vary between * liblzma versions). * * Using a bigger depth value than the default can increase * compression ratio in some cases. There is no strict maximum value, * but high values (thousands or millions) should be used with care: * the encoder could remain fast enough with typical input, but * malicious input could cause the match finder to slow down * dramatically, possibly creating a denial of service attack. */ uint32_t depth; /** * \brief For LZMA_FILTER_LZMA1EXT: Extended flags * * This is used only with LZMA_FILTER_LZMA1EXT. * * Currently only one flag is supported, LZMA_LZMA1EXT_ALLOW_EOPM: * * - Encoder: If the flag is set, then end marker is written just * like it is with LZMA_FILTER_LZMA1. Without this flag the * end marker isn't written and the application has to store * the uncompressed size somewhere outside the compressed stream. - * To decompress streams without the end marker, the appliation + * To decompress streams without the end marker, the application * has to set the correct uncompressed size in ext_size_low and * ext_size_high. * * - Decoder: If the uncompressed size in ext_size_low and * ext_size_high is set to the special value UINT64_MAX * (indicating unknown uncompressed size) then this flag is * ignored and the end marker must always be present, that is, * the behavior is identical to LZMA_FILTER_LZMA1. * * Otherwise, if this flag isn't set, then the input stream * must not have the end marker; if the end marker is detected * then it will result in LZMA_DATA_ERROR. This is useful when * it is known that the stream must not have the end marker and * strict validation is wanted. * * If this flag is set, then it is autodetected if the end marker * is present after the specified number of uncompressed bytes * has been decompressed (ext_size_low and ext_size_high). The * end marker isn't allowed in any other position. This behavior * is useful when uncompressed size is known but the end marker * may or may not be present. This is the case, for example, * in .7z files (valid .7z files that have the end marker in * LZMA1 streams are rare but they do exist). */ uint32_t ext_flags; # define LZMA_LZMA1EXT_ALLOW_EOPM UINT32_C(0x01) /** * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (low bits) * * The 64-bit uncompressed size is needed for decompression with * LZMA_FILTER_LZMA1EXT. The size is ignored by the encoder. * * The special value UINT64_MAX indicates that the uncompressed size * is unknown and that the end of payload marker (also known as * end of stream marker) must be present to indicate the end of * the LZMA1 stream. Any other value indicates the expected * uncompressed size of the LZMA1 stream. (If LZMA1 was used together * with filters that change the size of the data then the uncompressed * size of the LZMA1 stream could be different than the final * uncompressed size of the filtered stream.) * * ext_size_low holds the least significant 32 bits of the * uncompressed size. The most significant 32 bits must be set * in ext_size_high. The macro lzma_ext_size_set(opt_lzma, u64size) * can be used to set these members. * * The 64-bit uncompressed size is split into two uint32_t variables * because there were no reserved uint64_t members and using the * same options structure for LZMA_FILTER_LZMA1, LZMA_FILTER_LZMA1EXT, * and LZMA_FILTER_LZMA2 was otherwise more convenient than having * a new options structure for LZMA_FILTER_LZMA1EXT. (Replacing two * uint32_t members with one uint64_t changes the ABI on some systems * as the alignment of this struct can increase from 4 bytes to 8.) */ uint32_t ext_size_low; /** * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (high bits) * * This holds the most significant 32 bits of the uncompressed size. */ uint32_t ext_size_high; /* * Reserved space to allow possible future extensions without * breaking the ABI. You should not touch these, because the names * of these variables may change. These are and will never be used * with the currently supported options, so it is safe to leave these * uninitialized. */ /** \private Reserved member. */ uint32_t reserved_int4; /** \private Reserved member. */ uint32_t reserved_int5; /** \private Reserved member. */ uint32_t reserved_int6; /** \private Reserved member. */ uint32_t reserved_int7; /** \private Reserved member. */ uint32_t reserved_int8; /** \private Reserved member. */ lzma_reserved_enum reserved_enum1; /** \private Reserved member. */ lzma_reserved_enum reserved_enum2; /** \private Reserved member. */ lzma_reserved_enum reserved_enum3; /** \private Reserved member. */ lzma_reserved_enum reserved_enum4; /** \private Reserved member. */ void *reserved_ptr1; /** \private Reserved member. */ void *reserved_ptr2; } lzma_options_lzma; /** * \brief Macro to set the 64-bit uncompressed size in ext_size_* * * This might be convenient when decoding using LZMA_FILTER_LZMA1EXT. * This isn't used with LZMA_FILTER_LZMA1 or LZMA_FILTER_LZMA2. */ #define lzma_set_ext_size(opt_lzma2, u64size) \ do { \ (opt_lzma2).ext_size_low = (uint32_t)(u64size); \ (opt_lzma2).ext_size_high = (uint32_t)((uint64_t)(u64size) >> 32); \ } while (0) /** * \brief Set a compression preset to lzma_options_lzma structure * * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 * of the xz command line tool. In addition, it is possible to bitwise-or * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. * The flags are defined in container.h, because the flags are used also * with lzma_easy_encoder(). * * The preset levels are subject to changes between liblzma versions. * * This function is available only if LZMA1 or LZMA2 encoder has been enabled * when building liblzma. * * If features (like certain match finders) have been disabled at build time, * then the function may return success (false) even though the resulting * LZMA1/LZMA2 options may not be usable for encoder initialization * (LZMA_OPTIONS_ERROR). * * \param[out] options Pointer to LZMA1 or LZMA2 options to be filled * \param preset Preset level bitwse-ORed with preset flags * * \return lzma_bool: * - true if the preset is not supported (failure). * - false otherwise (success). */ extern LZMA_API(lzma_bool) lzma_lzma_preset( lzma_options_lzma *options, uint32_t preset) lzma_nothrow; diff --git a/contrib/xz/src/liblzma/api/lzma/version.h b/contrib/xz/src/liblzma/api/lzma/version.h index 7f9105235843..8739d751c704 100644 --- a/contrib/xz/src/liblzma/api/lzma/version.h +++ b/contrib/xz/src/liblzma/api/lzma/version.h @@ -1,135 +1,135 @@ /** * \file lzma/version.h * \brief Version number * \note Never include this file directly. Use instead. */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** \brief Major version number of the liblzma release. */ #define LZMA_VERSION_MAJOR 5 /** \brief Minor version number of the liblzma release. */ #define LZMA_VERSION_MINOR 4 /** \brief Patch version number of the liblzma release. */ -#define LZMA_VERSION_PATCH 3 +#define LZMA_VERSION_PATCH 4 /** * \brief Version stability marker * * This will always be one of three values: * - LZMA_VERSION_STABILITY_ALPHA * - LZMA_VERSION_STABILITY_BETA * - LZMA_VERSION_STABILITY_STABLE */ #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE /** \brief Commit version number of the liblzma release */ #ifndef LZMA_VERSION_COMMIT # define LZMA_VERSION_COMMIT "" #endif /* * Map symbolic stability levels to integers. */ #define LZMA_VERSION_STABILITY_ALPHA 0 #define LZMA_VERSION_STABILITY_BETA 1 #define LZMA_VERSION_STABILITY_STABLE 2 /** * \brief Compile-time version number * * The version number is of format xyyyzzzs where * - x = major * - yyy = minor * - zzz = revision * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable * * The same xyyyzzz triplet is never reused with different stability levels. * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta * or 5.1.0 stable. * * \note The version number of liblzma has nothing to with * the version number of Igor Pavlov's LZMA SDK. */ #define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + LZMA_VERSION_MINOR * UINT32_C(10000) \ + LZMA_VERSION_PATCH * UINT32_C(10) \ + LZMA_VERSION_STABILITY) /* * Macros to construct the compile-time version string */ #if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA # define LZMA_VERSION_STABILITY_STRING "alpha" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA # define LZMA_VERSION_STABILITY_STRING "beta" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE # define LZMA_VERSION_STABILITY_STRING "" #else # error Incorrect LZMA_VERSION_STABILITY #endif #define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ #major "." #minor "." #patch stability commit #define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) /** * \brief Compile-time version as a string * * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable * versions don't have any "stable" suffix). In future, a snapshot built * from source code repository may include an additional suffix, for example * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form * in LZMA_VERSION macro. */ #define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ LZMA_VERSION_COMMIT) /* #ifndef is needed for use with windres (MinGW or Cygwin). */ #ifndef LZMA_H_INTERNAL_RC /** * \brief Run-time version number as an integer * * This allows an application to compare if it was built against the same, * older, or newer version of liblzma that is currently running. * * \return The value of LZMA_VERSION macro at the compile time of liblzma */ extern LZMA_API(uint32_t) lzma_version_number(void) lzma_nothrow lzma_attr_const; /** * \brief Run-time version as a string * * This function may be useful to display which version of liblzma an * application is currently using. * * \return Run-time version of liblzma */ extern LZMA_API(const char *) lzma_version_string(void) lzma_nothrow lzma_attr_const; #endif diff --git a/contrib/xz/src/liblzma/common/block_buffer_encoder.c b/contrib/xz/src/liblzma/common/block_buffer_encoder.c index a47342efd0f5..fdef02de8955 100644 --- a/contrib/xz/src/liblzma/common/block_buffer_encoder.c +++ b/contrib/xz/src/liblzma/common/block_buffer_encoder.c @@ -1,355 +1,355 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file block_buffer_encoder.c /// \brief Single-call .xz Block encoder // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "block_buffer_encoder.h" #include "block_encoder.h" #include "filter_encoder.h" #include "lzma2_encoder.h" #include "check.h" /// Estimate the maximum size of the Block Header and Check fields for /// a Block that uses LZMA2 uncompressed chunks. We could use /// lzma_block_header_size() but this is simpler. /// /// Block Header Size + Block Flags + Compressed Size /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check /// and round up to the next multiple of four to take Header Padding /// into account. #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \ + LZMA_CHECK_SIZE_MAX + 3) & ~3) static uint64_t lzma2_bound(uint64_t uncompressed_size) { // Prevent integer overflow in overhead calculation. if (uncompressed_size > COMPRESSED_SIZE_MAX) return 0; // Calculate the exact overhead of the LZMA2 headers: Round // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, // multiply by the size of per-chunk header, and add one byte for // the end marker. const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) / LZMA2_CHUNK_MAX) * LZMA2_HEADER_UNCOMPRESSED + 1; // Catch the possible integer overflow. if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size) return 0; return uncompressed_size + overhead; } extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size) { // If the data doesn't compress, we always use uncompressed // LZMA2 chunks. uint64_t lzma2_size = lzma2_bound(uncompressed_size); if (lzma2_size == 0) return 0; // Take Block Padding into account. lzma2_size = (lzma2_size + 3) & ~UINT64_C(3); // No risk of integer overflow because lzma2_bound() already takes // into account the size of the headers in the Block. return HEADERS_BOUND + lzma2_size; } extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) { uint64_t ret = lzma_block_buffer_bound64(uncompressed_size); #if SIZE_MAX < UINT64_MAX // Catch the possible integer overflow on 32-bit systems. if (ret > SIZE_MAX) return 0; #endif return ret; } static lzma_ret block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at // all, but LZMA2 always requires a dictionary, so use the minimum // value to minimize memory usage of the decoder. lzma_options_lzma lzma2 = { .dict_size = LZMA_DICT_SIZE_MIN, }; lzma_filter filters[2]; filters[0].id = LZMA_FILTER_LZMA2; filters[0].options = &lzma2; filters[1].id = LZMA_VLI_UNKNOWN; // Set the above filter options to *block temporarily so that we can // encode the Block Header. lzma_filter *filters_orig = block->filters; block->filters = filters; if (lzma_block_header_size(block) != LZMA_OK) { block->filters = filters_orig; return LZMA_PROG_ERROR; } // Check that there's enough output space. The caller has already // set block->compressed_size to what lzma2_bound() has returned, // so we can reuse that value. We know that compressed_size is a // known valid VLI and header_size is a small value so their sum // will never overflow. assert(block->compressed_size == lzma2_bound(in_size)); if (out_size - *out_pos < block->header_size + block->compressed_size) { block->filters = filters_orig; return LZMA_BUF_ERROR; } if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) { block->filters = filters_orig; return LZMA_PROG_ERROR; } block->filters = filters_orig; *out_pos += block->header_size; // Encode the data using LZMA2 uncompressed chunks. size_t in_pos = 0; uint8_t control = 0x01; // Dictionary reset while (in_pos < in_size) { // Control byte: Indicate uncompressed chunk, of which // the first resets the dictionary. out[(*out_pos)++] = control; control = 0x02; // No dictionary reset // Size of the uncompressed chunk const size_t copy_size = my_min(in_size - in_pos, LZMA2_CHUNK_MAX); out[(*out_pos)++] = (copy_size - 1) >> 8; out[(*out_pos)++] = (copy_size - 1) & 0xFF; // The actual data assert(*out_pos + copy_size <= out_size); memcpy(out + *out_pos, in + in_pos, copy_size); in_pos += copy_size; *out_pos += copy_size; } // End marker out[(*out_pos)++] = 0x00; assert(*out_pos <= out_size); return LZMA_OK; } static lzma_ret block_encode_normal(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // Find out the size of the Block Header. return_if_error(lzma_block_header_size(block)); // Reserve space for the Block Header and skip it for now. if (out_size - *out_pos <= block->header_size) return LZMA_BUF_ERROR; const size_t out_start = *out_pos; *out_pos += block->header_size; // Limit out_size so that we stop encoding if the output would grow // bigger than what uncompressed Block would be. if (out_size - *out_pos > block->compressed_size) out_size = *out_pos + block->compressed_size; // TODO: In many common cases this could be optimized to use // significantly less memory. lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT; lzma_ret ret = lzma_raw_encoder_init( &raw_encoder, allocator, block->filters); if (ret == LZMA_OK) { size_t in_pos = 0; ret = raw_encoder.code(raw_encoder.coder, allocator, in, &in_pos, in_size, out, out_pos, out_size, LZMA_FINISH); } // NOTE: This needs to be run even if lzma_raw_encoder_init() failed. lzma_next_end(&raw_encoder, allocator); if (ret == LZMA_STREAM_END) { // Compression was successful. Write the Block Header. block->compressed_size = *out_pos - (out_start + block->header_size); ret = lzma_block_header_encode(block, out + out_start); if (ret != LZMA_OK) ret = LZMA_PROG_ERROR; } else if (ret == LZMA_OK) { // Output buffer became full. ret = LZMA_BUF_ERROR; } // Reset *out_pos if something went wrong. if (ret != LZMA_OK) *out_pos = out_start; return ret; } static lzma_ret block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size, bool try_to_compress) { // Validate the arguments. if (block == NULL || (in == NULL && in_size != 0) || out == NULL || out_pos == NULL || *out_pos > out_size) return LZMA_PROG_ERROR; // The contents of the structure may depend on the version so // check the version before validating the contents of *block. if (block->version > 1) return LZMA_OPTIONS_ERROR; if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX || (try_to_compress && block->filters == NULL)) return LZMA_PROG_ERROR; if (!lzma_check_is_supported(block->check)) return LZMA_UNSUPPORTED_CHECK; // Size of a Block has to be a multiple of four, so limit the size // here already. This way we don't need to check it again when adding // Block Padding. out_size -= (out_size - *out_pos) & 3; // Get the size of the Check field. const size_t check_size = lzma_check_size(block->check); assert(check_size != UINT32_MAX); // Reserve space for the Check field. if (out_size - *out_pos <= check_size) return LZMA_BUF_ERROR; out_size -= check_size; // Initialize block->uncompressed_size and calculate the worst-case // value for block->compressed_size. block->uncompressed_size = in_size; block->compressed_size = lzma2_bound(in_size); if (block->compressed_size == 0) return LZMA_DATA_ERROR; // Do the actual compression. lzma_ret ret = LZMA_BUF_ERROR; if (try_to_compress) ret = block_encode_normal(block, allocator, in, in_size, out, out_pos, out_size); if (ret != LZMA_OK) { // If the error was something else than output buffer // becoming full, return the error now. if (ret != LZMA_BUF_ERROR) return ret; - // The data was uncompressible (at least with the options + // The data was incompressible (at least with the options // given to us) or the output buffer was too small. Use the // uncompressed chunks of LZMA2 to wrap the data into a valid // Block. If we haven't been given enough output space, even // this may fail. return_if_error(block_encode_uncompressed(block, in, in_size, out, out_pos, out_size)); } assert(*out_pos <= out_size); // Block Padding. No buffer overflow here, because we already adjusted // out_size so that (out_size - out_start) is a multiple of four. // Thus, if the buffer is full, the loop body can never run. for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) { assert(*out_pos < out_size); out[(*out_pos)++] = 0x00; } // If there's no Check field, we are done now. if (check_size > 0) { // Calculate the integrity check. We reserved space for // the Check field earlier so we don't need to check for // available output space here. lzma_check_state check; lzma_check_init(&check, block->check); lzma_check_update(&check, block->check, in, in_size); lzma_check_finish(&check, block->check); memcpy(block->raw_check, check.buffer.u8, check_size); memcpy(out + *out_pos, check.buffer.u8, check_size); *out_pos += check_size; } return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { return block_buffer_encode(block, allocator, in, in_size, out, out_pos, out_size, true); } #ifdef HAVE_SYMBOL_VERSIONS_LINUX // This is for compatibility with binaries linked against liblzma that // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2", lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result __attribute__((__alias__("lzma_block_uncomp_encode_52"))); LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2", lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; #define lzma_block_uncomp_encode lzma_block_uncomp_encode_52 #endif extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // It won't allocate any memory from heap so no need // for lzma_allocator. return block_buffer_encode(block, NULL, in, in_size, out, out_pos, out_size, false); } diff --git a/contrib/xz/src/liblzma/common/common.h b/contrib/xz/src/liblzma/common/common.h index 11fec52c5970..4d9cab53cdbb 100644 --- a/contrib/xz/src/liblzma/common/common.h +++ b/contrib/xz/src/liblzma/common/common.h @@ -1,385 +1,385 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file common.h /// \brief Definitions common to the whole liblzma library // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_COMMON_H #define LZMA_COMMON_H #include "sysdefs.h" #include "mythread.h" #include "tuklib_integer.h" #if defined(_WIN32) || defined(__CYGWIN__) # ifdef DLL_EXPORT # define LZMA_API_EXPORT __declspec(dllexport) # else # define LZMA_API_EXPORT # endif // Don't use ifdef or defined() below. #elif HAVE_VISIBILITY # define LZMA_API_EXPORT __attribute__((__visibility__("default"))) #else # define LZMA_API_EXPORT #endif #define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL #include "lzma.h" // This is for detecting modern GCC and Clang attributes // like __symver__ in GCC >= 10. #ifdef __has_attribute # define lzma_has_attribute(attr) __has_attribute(attr) #else # define lzma_has_attribute(attr) 0 #endif // The extra symbol versioning in the C files may only be used when // building a shared library. If HAVE_SYMBOL_VERSIONS_LINUX is defined // to 2 then symbol versioning is done only if also PIC is defined. // By default Libtool defines PIC when building a shared library and // doesn't define it when building a static library but it can be -// overriden with --with-pic and --without-pic. configure let's rely +// overridden with --with-pic and --without-pic. configure let's rely // on PIC if neither --with-pic or --without-pic was used. #if defined(HAVE_SYMBOL_VERSIONS_LINUX) \ && (HAVE_SYMBOL_VERSIONS_LINUX == 2 && !defined(PIC)) # undef HAVE_SYMBOL_VERSIONS_LINUX #endif #ifdef HAVE_SYMBOL_VERSIONS_LINUX // To keep link-time optimization (LTO, -flto) working with GCC, // the __symver__ attribute must be used instead of __asm__(".symver ..."). // Otherwise the symbol versions may be lost, resulting in broken liblzma // that has wrong default versions in the exported symbol list! // The attribute was added in GCC 10; LTO with older GCC is not supported. // // To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function // declarations (including those with __alias__ attribute) and LZMA_API with // the function definitions. This means a little bit of silly copy-and-paste // between declarations and definitions though. // // As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the // very convenient @@@ isn't supported (it's supported by GNU assembler // since 2000). When using @@ instead of @@@, the internal name must not be // the same as the external name to avoid problems in some situations. This // is why "#define foo_52 foo" is needed for the default symbol versions. // // __has_attribute is supported before GCC 10 and it is supported in Clang 14 // too (which doesn't support __symver__) so use it to detect if __symver__ // is available. This should be far more reliable than looking at compiler // version macros as nowadays especially __GNUC__ is defined by many compilers. # if lzma_has_attribute(__symver__) # define LZMA_SYMVER_API(extnamever, type, intname) \ extern __attribute__((__symver__(extnamever))) \ LZMA_API(type) intname # else # define LZMA_SYMVER_API(extnamever, type, intname) \ __asm__(".symver " #intname "," extnamever); \ extern LZMA_API(type) intname # endif #endif // These allow helping the compiler in some often-executed branches, whose // result is almost always the same. #ifdef __GNUC__ # define likely(expr) __builtin_expect(expr, true) # define unlikely(expr) __builtin_expect(expr, false) #else # define likely(expr) (expr) # define unlikely(expr) (expr) #endif /// Size of temporary buffers needed in some filters #define LZMA_BUFFER_SIZE 4096 /// Maximum number of worker threads within one multithreaded component. /// The limit exists solely to make it simpler to prevent integer overflows /// when allocating structures etc. This should be big enough for now... /// the code won't scale anywhere close to this number anyway. #define LZMA_THREADS_MAX 16384 /// Starting value for memory usage estimates. Instead of calculating size /// of _every_ structure and taking into account malloc() overhead etc., we /// add a base size to all memory usage estimates. It's not very accurate /// but should be easily good enough. #define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15) /// Start of internal Filter ID space. These IDs must never be used /// in Streams. #define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62) /// Supported flags that can be passed to lzma_stream_decoder(), /// lzma_auto_decoder(), or lzma_stream_decoder_mt(). #define LZMA_SUPPORTED_FLAGS \ ( LZMA_TELL_NO_CHECK \ | LZMA_TELL_UNSUPPORTED_CHECK \ | LZMA_TELL_ANY_CHECK \ | LZMA_IGNORE_CHECK \ | LZMA_CONCATENATED \ | LZMA_FAIL_FAST ) /// Largest valid lzma_action value as unsigned integer. #define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER)) /// Special return value (lzma_ret) to indicate that a timeout was reached /// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to /// LZMA_OK in lzma_code(). #define LZMA_TIMED_OUT LZMA_RET_INTERNAL1 /// Special return value (lzma_ret) for use in stream_decoder_mt.c to /// indicate Index was detected instead of a Block Header. #define LZMA_INDEX_DETECTED LZMA_RET_INTERNAL2 typedef struct lzma_next_coder_s lzma_next_coder; typedef struct lzma_filter_info_s lzma_filter_info; /// Type of a function used to initialize a filter encoder or decoder typedef lzma_ret (*lzma_init_function)( lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters); /// Type of a function to do some kind of coding work (filters, Stream, /// Block encoders/decoders etc.). Some special coders use don't use both /// input and output buffers, but for simplicity they still use this same /// function prototype. typedef lzma_ret (*lzma_code_function)( void *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action); /// Type of a function to free the memory allocated for the coder typedef void (*lzma_end_function)( void *coder, const lzma_allocator *allocator); /// Raw coder validates and converts an array of lzma_filter structures to /// an array of lzma_filter_info structures. This array is used with /// lzma_next_filter_init to initialize the filter chain. struct lzma_filter_info_s { /// Filter ID. This can be used to share the same initiazation /// function *and* data structures with different Filter IDs /// (LZMA_FILTER_LZMA1EXT does it), and also by the encoder /// with lzma_filters_update() if filter chain is updated /// in the middle of a raw stream or Block (LZMA_SYNC_FLUSH). lzma_vli id; /// Pointer to function used to initialize the filter. /// This is NULL to indicate end of array. lzma_init_function init; /// Pointer to filter's options structure void *options; }; /// Hold data and function pointers of the next filter in the chain. struct lzma_next_coder_s { /// Pointer to coder-specific data void *coder; /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't /// point to a filter coder. lzma_vli id; /// "Pointer" to init function. This is never called here. /// We need only to detect if we are initializing a coder /// that was allocated earlier. See lzma_next_coder_init and /// lzma_next_strm_init macros in this file. uintptr_t init; /// Pointer to function to do the actual coding lzma_code_function code; /// Pointer to function to free lzma_next_coder.coder. This can /// be NULL; in that case, lzma_free is called to free /// lzma_next_coder.coder. lzma_end_function end; /// Pointer to a function to get progress information. If this is NULL, /// lzma_stream.total_in and .total_out are used instead. void (*get_progress)(void *coder, uint64_t *progress_in, uint64_t *progress_out); /// Pointer to function to return the type of the integrity check. /// Most coders won't support this. lzma_check (*get_check)(const void *coder); /// Pointer to function to get and/or change the memory usage limit. /// If new_memlimit == 0, the limit is not changed. lzma_ret (*memconfig)(void *coder, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit); /// Update the filter-specific options or the whole filter chain /// in the encoder. lzma_ret (*update)(void *coder, const lzma_allocator *allocator, const lzma_filter *filters, const lzma_filter *reversed_filters); /// Set how many bytes of output this coder may produce at maximum. /// On success LZMA_OK must be returned. /// If the filter chain as a whole cannot support this feature, /// this must return LZMA_OPTIONS_ERROR. /// If no input has been given to the coder and the requested limit /// is too small, this must return LZMA_BUF_ERROR. If input has been /// seen, LZMA_OK is allowed too. lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size, uint64_t out_limit); }; /// Macro to initialize lzma_next_coder structure #define LZMA_NEXT_CODER_INIT \ (lzma_next_coder){ \ .coder = NULL, \ .init = (uintptr_t)(NULL), \ .id = LZMA_VLI_UNKNOWN, \ .code = NULL, \ .end = NULL, \ .get_progress = NULL, \ .get_check = NULL, \ .memconfig = NULL, \ .update = NULL, \ .set_out_limit = NULL, \ } /// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to /// this is stored in lzma_stream. struct lzma_internal_s { /// The actual coder that should do something useful lzma_next_coder next; /// Track the state of the coder. This is used to validate arguments /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH /// is used on every call to lzma_code until next.code has returned /// LZMA_STREAM_END. enum { ISEQ_RUN, ISEQ_SYNC_FLUSH, ISEQ_FULL_FLUSH, ISEQ_FINISH, ISEQ_FULL_BARRIER, ISEQ_END, ISEQ_ERROR, } sequence; /// A copy of lzma_stream avail_in. This is used to verify that the /// amount of input doesn't change once e.g. LZMA_FINISH has been /// used. size_t avail_in; /// Indicates which lzma_action values are allowed by next.code. bool supported_actions[LZMA_ACTION_MAX + 1]; /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was /// made (no input consumed and no output produced by next.code). bool allow_buf_error; }; /// Allocates memory extern void *lzma_alloc(size_t size, const lzma_allocator *allocator) lzma_attribute((__malloc__)) lzma_attr_alloc_size(1); /// Allocates memory and zeroes it (like calloc()). This can be faster /// than lzma_alloc() + memzero() while being backward compatible with /// custom allocators. extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) lzma_alloc_zero(size_t size, const lzma_allocator *allocator); /// Frees memory extern void lzma_free(void *ptr, const lzma_allocator *allocator); /// Allocates strm->internal if it is NULL, and initializes *strm and /// strm->internal. This function is only called via lzma_next_strm_init macro. extern lzma_ret lzma_strm_init(lzma_stream *strm); /// Initializes the next filter in the chain, if any. This takes care of /// freeing the memory of previously initialized filter if it is different /// than the filter being initialized now. This way the actual filter /// initialization functions don't need to use lzma_next_coder_init macro. extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters); /// Update the next filter in the chain, if any. This checks that /// the application is not trying to change the Filter IDs. extern lzma_ret lzma_next_filter_update( lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *reversed_filters); /// Frees the memory allocated for next->coder either using next->end or, /// if next->end is NULL, using lzma_free. extern void lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator); /// Copy as much data as possible from in[] to out[] and update *in_pos /// and *out_pos accordingly. Returns the number of bytes copied. extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size); /// \brief Return if expression doesn't evaluate to LZMA_OK /// /// There are several situations where we want to return immediately /// with the value of expr if it isn't LZMA_OK. This macro shortens /// the code a little. #define return_if_error(expr) \ do { \ const lzma_ret ret_ = (expr); \ if (ret_ != LZMA_OK) \ return ret_; \ } while (0) /// If next isn't already initialized, free the previous coder. Then mark /// that next is _possibly_ initialized for the coder using this macro. /// "Possibly" means that if e.g. allocation of next->coder fails, the /// structure isn't actually initialized for this coder, but leaving /// next->init to func is still OK. #define lzma_next_coder_init(func, next, allocator) \ do { \ if ((uintptr_t)(func) != (next)->init) \ lzma_next_end(next, allocator); \ (next)->init = (uintptr_t)(func); \ } while (0) /// Initializes lzma_strm and calls func() to initialize strm->internal->next. /// (The function being called will use lzma_next_coder_init()). If /// initialization fails, memory that wasn't freed by func() is freed /// along strm->internal. #define lzma_next_strm_init(func, strm, ...) \ do { \ return_if_error(lzma_strm_init(strm)); \ const lzma_ret ret_ = func(&(strm)->internal->next, \ (strm)->allocator, __VA_ARGS__); \ if (ret_ != LZMA_OK) { \ lzma_end(strm); \ return ret_; \ } \ } while (0) #endif diff --git a/contrib/xz/src/liblzma/common/file_info.c b/contrib/xz/src/liblzma/common/file_info.c index a6b7e145ae04..799bb024fe1f 100644 --- a/contrib/xz/src/liblzma/common/file_info.c +++ b/contrib/xz/src/liblzma/common/file_info.c @@ -1,855 +1,855 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file file_info.c /// \brief Decode .xz file information into a lzma_index structure // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "index_decoder.h" typedef struct { enum { SEQ_MAGIC_BYTES, SEQ_PADDING_SEEK, SEQ_PADDING_DECODE, SEQ_FOOTER, SEQ_INDEX_INIT, SEQ_INDEX_DECODE, SEQ_HEADER_DECODE, SEQ_HEADER_COMPARE, } sequence; /// Absolute position of in[*in_pos] in the file. All code that /// modifies *in_pos also updates this. seek_to_pos() needs this /// to determine if we need to request the application to seek for /// us or if we can do the seeking internally by adjusting *in_pos. uint64_t file_cur_pos; /// This refers to absolute positions of interesting parts of the /// input file. Sometimes it points to the *beginning* of a specific /// field and sometimes to the *end* of a field. The current target /// position at each moment is explained in the comments. uint64_t file_target_pos; /// Size of the .xz file (from the application). uint64_t file_size; /// Index decoder lzma_next_coder index_decoder; /// Number of bytes remaining in the Index field that is currently /// being decoded. lzma_vli index_remaining; /// The Index decoder will store the decoded Index in this pointer. lzma_index *this_index; /// Amount of Stream Padding in the current Stream. lzma_vli stream_padding; /// The final combined index is collected here. lzma_index *combined_index; /// Pointer from the application where to store the index information /// after successful decoding. lzma_index **dest_index; /// Pointer to lzma_stream.seek_pos to be used when returning /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed. uint64_t *external_seek_pos; /// Memory usage limit uint64_t memlimit; /// Stream Flags from the very beginning of the file. lzma_stream_flags first_header_flags; /// Stream Flags from Stream Header of the current Stream. lzma_stream_flags header_flags; /// Stream Flags from Stream Footer of the current Stream. lzma_stream_flags footer_flags; size_t temp_pos; size_t temp_size; uint8_t temp[8192]; } lzma_file_info_coder; /// Copies data from in[*in_pos] into coder->temp until /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos /// in sync with *in_pos. Returns true if more input is needed. static bool fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size, coder->temp, &coder->temp_pos, coder->temp_size); return coder->temp_pos < coder->temp_size; } /// Seeks to the absolute file position specified by target_pos. /// This tries to do the seeking by only modifying *in_pos, if possible. /// The main benefit of this is that if one passes the whole file at once /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED /// as all the seeking can be done by adjusting *in_pos in this function. /// /// Returns true if an external seek is needed and the caller must return /// LZMA_SEEK_NEEDED. static bool seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos, size_t in_start, size_t *in_pos, size_t in_size) { // The input buffer doesn't extend beyond the end of the file. // This has been checked by file_info_decode() already. assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos); const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start); const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos); bool external_seek_needed; if (target_pos >= pos_min && target_pos <= pos_max) { // The requested position is available in the current input // buffer or right after it. That is, in a corner case we // end up setting *in_pos == in_size and thus will immediately // need new input bytes from the application. *in_pos += (size_t)(target_pos - coder->file_cur_pos); external_seek_needed = false; } else { // Ask the application to seek the input file. *coder->external_seek_pos = target_pos; external_seek_needed = true; // Mark the whole input buffer as used. This way // lzma_stream.total_in will have a better estimate // of the amount of data read. It still won't be perfect // as the value will depend on the input buffer size that // the application uses, but it should be good enough for // those few who want an estimate. *in_pos = in_size; } // After seeking (internal or external) the current position // will match the requested target position. coder->file_cur_pos = target_pos; return external_seek_needed; } /// The caller sets coder->file_target_pos so that it points to the *end* /// of the desired file position. This function then determines how far /// backwards from that position we can seek. After seeking fill_temp() /// can be used to read data into coder->temp. When fill_temp() has finished, /// coder->temp[coder->temp_size] will match coder->file_target_pos. /// /// This also validates that coder->target_file_pos is sane in sense that /// we aren't trying to seek too far backwards (too close or beyond the /// beginning of the file). static lzma_ret reverse_seek(lzma_file_info_coder *coder, size_t in_start, size_t *in_pos, size_t in_size) { // Check that there is enough data before the target position // to contain at least Stream Header and Stream Footer. If there // isn't, the file cannot be valid. if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE) return LZMA_DATA_ERROR; coder->temp_pos = 0; // The Stream Header at the very beginning of the file gets handled // specially in SEQ_MAGIC_BYTES and thus we will never need to seek // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes // we avoid a useless external seek after SEQ_MAGIC_BYTES if the // application uses an extremely small input buffer and the input // file is very small. if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE < sizeof(coder->temp)) coder->temp_size = (size_t)(coder->file_target_pos - LZMA_STREAM_HEADER_SIZE); else coder->temp_size = sizeof(coder->temp); // The above if-statements guarantee this. This is important because // the Stream Header/Footer decoders assume that there's at least // LZMA_STREAM_HEADER_SIZE bytes in coder->temp. assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE); if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size, in_start, in_pos, in_size)) return LZMA_SEEK_NEEDED; return LZMA_OK; } /// Gets the number of zero-bytes at the end of the buffer. static size_t get_padding_size(const uint8_t *buf, size_t buf_size) { size_t padding = 0; while (buf_size > 0 && buf[--buf_size] == 0x00) ++padding; return padding; } /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR /// is used to tell the application that Magic Bytes didn't match. In other /// Stream Header/Footer fields (in the middle/end of the file) it could be /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there /// is a valid Stream Header at the beginning of the file. For those cases /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR. static lzma_ret hide_format_error(lzma_ret ret) { if (ret == LZMA_FORMAT_ERROR) ret = LZMA_DATA_ERROR; return ret; } /// Calls the Index decoder and updates coder->index_remaining. /// This is a separate function because the input can be either directly /// from the application or from coder->temp. static lzma_ret decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, bool update_file_cur_pos) { const size_t in_start = *in_pos; const lzma_ret ret = coder->index_decoder.code( coder->index_decoder.coder, allocator, in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); coder->index_remaining -= *in_pos - in_start; if (update_file_cur_pos) coder->file_cur_pos += *in_pos - in_start; return ret; } static lzma_ret file_info_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out lzma_attribute((__unused__)), size_t *restrict out_pos lzma_attribute((__unused__)), size_t out_size lzma_attribute((__unused__)), lzma_action action lzma_attribute((__unused__))) { lzma_file_info_coder *coder = coder_ptr; const size_t in_start = *in_pos; // If the caller provides input past the end of the file, trim // the extra bytes from the buffer so that we won't read too far. assert(coder->file_size >= coder->file_cur_pos); if (coder->file_size - coder->file_cur_pos < in_size - in_start) in_size = in_start + (size_t)(coder->file_size - coder->file_cur_pos); while (true) switch (coder->sequence) { case SEQ_MAGIC_BYTES: // Decode the Stream Header at the beginning of the file // first to check if the Magic Bytes match. The flags // are stored in coder->first_header_flags so that we // don't need to seek to it again. // // Check that the file is big enough to contain at least // Stream Header. if (coder->file_size < LZMA_STREAM_HEADER_SIZE) return LZMA_FORMAT_ERROR; // Read the Stream Header field into coder->temp. if (fill_temp(coder, in, in_pos, in_size)) return LZMA_OK; // This is the only Stream Header/Footer decoding where we // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't // match. Elsewhere it will be converted to LZMA_DATA_ERROR. return_if_error(lzma_stream_header_decode( &coder->first_header_flags, coder->temp)); // Now that we know that the Magic Bytes match, check the // file size. It's better to do this here after checking the // Magic Bytes since this way we can give LZMA_FORMAT_ERROR // instead of LZMA_DATA_ERROR when the Magic Bytes don't // match in a file that is too big or isn't a multiple of // four bytes. if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3)) return LZMA_DATA_ERROR; // Start looking for Stream Padding and Stream Footer // at the end of the file. coder->file_target_pos = coder->file_size; // Fall through case SEQ_PADDING_SEEK: coder->sequence = SEQ_PADDING_DECODE; return_if_error(reverse_seek( coder, in_start, in_pos, in_size)); // Fall through case SEQ_PADDING_DECODE: { // Copy to coder->temp first. This keeps the code simpler if // the application only provides input a few bytes at a time. if (fill_temp(coder, in, in_pos, in_size)) return LZMA_OK; // Scan the buffer backwards to get the size of the // Stream Padding field (if any). const size_t new_padding = get_padding_size( coder->temp, coder->temp_size); coder->stream_padding += new_padding; // Set the target position to the beginning of Stream Padding // that has been observed so far. If all Stream Padding has // been seen, then the target position will be at the end // of the Stream Footer field. coder->file_target_pos -= new_padding; if (new_padding == coder->temp_size) { // The whole buffer was padding. Seek backwards in // the file to get more input. coder->sequence = SEQ_PADDING_SEEK; break; } // Size of Stream Padding must be a multiple of 4 bytes. if (coder->stream_padding & 3) return LZMA_DATA_ERROR; coder->sequence = SEQ_FOOTER; // Calculate the amount of non-padding data in coder->temp. coder->temp_size -= new_padding; coder->temp_pos = coder->temp_size; // We can avoid an external seek if the whole Stream Footer // is already in coder->temp. In that case SEQ_FOOTER won't // read more input and will find the Stream Footer from // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE]. // // Otherwise we will need to seek. The seeking is done so - // that Stream Footer wil be at the end of coder->temp. + // that Stream Footer will be at the end of coder->temp. // This way it's likely that we also get a complete Index // field into coder->temp without needing a separate seek // for that (unless the Index field is big). if (coder->temp_size < LZMA_STREAM_HEADER_SIZE) return_if_error(reverse_seek( coder, in_start, in_pos, in_size)); } // Fall through case SEQ_FOOTER: // Copy the Stream Footer field into coder->temp. // If Stream Footer was already available in coder->temp // in SEQ_PADDING_DECODE, then this does nothing. if (fill_temp(coder, in, in_pos, in_size)) return LZMA_OK; // Make coder->file_target_pos and coder->temp_size point // to the beginning of Stream Footer and thus to the end // of the Index field. coder->temp_pos will be updated // a bit later. coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; coder->temp_size -= LZMA_STREAM_HEADER_SIZE; // Decode Stream Footer. return_if_error(hide_format_error(lzma_stream_footer_decode( &coder->footer_flags, coder->temp + coder->temp_size))); // Check that we won't seek past the beginning of the file. // // LZMA_STREAM_HEADER_SIZE is added because there must be // space for Stream Header too even though we won't seek // there before decoding the Index field. // // There's no risk of integer overflow here because // Backward Size cannot be greater than 2^34. if (coder->file_target_pos < coder->footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE) return LZMA_DATA_ERROR; // Set the target position to the beginning of the Index field. coder->file_target_pos -= coder->footer_flags.backward_size; coder->sequence = SEQ_INDEX_INIT; // We can avoid an external seek if the whole Index field is // already available in coder->temp. if (coder->temp_size >= coder->footer_flags.backward_size) { // Set coder->temp_pos to point to the beginning // of the Index. coder->temp_pos = coder->temp_size - coder->footer_flags.backward_size; } else { // These are set to zero to indicate that there's no // useful data (Index or anything else) in coder->temp. coder->temp_pos = 0; coder->temp_size = 0; // Seek to the beginning of the Index field. if (seek_to_pos(coder, coder->file_target_pos, in_start, in_pos, in_size)) return LZMA_SEEK_NEEDED; } // Fall through case SEQ_INDEX_INIT: { // Calculate the amount of memory already used by the earlier // Indexes so that we know how big memory limit to pass to // the Index decoder. // // NOTE: When there are multiple Streams, the separate // lzma_index structures can use more RAM (as measured by // lzma_index_memused()) than the final combined lzma_index. // Thus memlimit may need to be slightly higher than the final // calculated memory usage will be. This is perhaps a bit // confusing to the application, but I think it shouldn't // cause problems in practice. uint64_t memused = 0; if (coder->combined_index != NULL) { memused = lzma_index_memused(coder->combined_index); assert(memused <= coder->memlimit); if (memused > coder->memlimit) // Extra sanity check return LZMA_PROG_ERROR; } // Initialize the Index decoder. return_if_error(lzma_index_decoder_init( &coder->index_decoder, allocator, &coder->this_index, coder->memlimit - memused)); coder->index_remaining = coder->footer_flags.backward_size; coder->sequence = SEQ_INDEX_DECODE; } // Fall through case SEQ_INDEX_DECODE: { // Decode (a part of) the Index. If the whole Index is already // in coder->temp, read it from there. Otherwise read from // in[*in_pos] onwards. Note that index_decode() updates // coder->index_remaining and optionally coder->file_cur_pos. lzma_ret ret; if (coder->temp_size != 0) { assert(coder->temp_size - coder->temp_pos == coder->index_remaining); ret = decode_index(coder, allocator, coder->temp, &coder->temp_pos, coder->temp_size, false); } else { // Don't give the decoder more input than the known // remaining size of the Index field. size_t in_stop = in_size; if (in_size - *in_pos > coder->index_remaining) in_stop = *in_pos + (size_t)(coder->index_remaining); ret = decode_index(coder, allocator, in, in_pos, in_stop, true); } switch (ret) { case LZMA_OK: // If the Index docoder asks for more input when we // have already given it as much input as Backward Size // indicated, the file is invalid. if (coder->index_remaining == 0) return LZMA_DATA_ERROR; // We cannot get here if we were reading Index from // coder->temp because when reading from coder->temp // we give the Index decoder exactly // coder->index_remaining bytes of input. assert(coder->temp_size == 0); return LZMA_OK; case LZMA_STREAM_END: // If the decoding seems to be successful, check also // that the Index decoder consumed as much input as // indicated by the Backward Size field. if (coder->index_remaining != 0) return LZMA_DATA_ERROR; break; default: return ret; } // Calculate how much the Index tells us to seek backwards // (relative to the beginning of the Index): Total size of // all Blocks plus the size of the Stream Header field. // No integer overflow here because lzma_index_total_size() // cannot return a value greater than LZMA_VLI_MAX. const uint64_t seek_amount = lzma_index_total_size(coder->this_index) + LZMA_STREAM_HEADER_SIZE; // Check that Index is sane in sense that seek_amount won't // make us seek past the beginning of the file when locating // the Stream Header. // // coder->file_target_pos still points to the beginning of // the Index field. if (coder->file_target_pos < seek_amount) return LZMA_DATA_ERROR; // Set the target to the beginning of Stream Header. coder->file_target_pos -= seek_amount; if (coder->file_target_pos == 0) { // We would seek to the beginning of the file, but // since we already decoded that Stream Header in // SEQ_MAGIC_BYTES, we can use the cached value from // coder->first_header_flags to avoid the seek. coder->header_flags = coder->first_header_flags; coder->sequence = SEQ_HEADER_COMPARE; break; } coder->sequence = SEQ_HEADER_DECODE; // Make coder->file_target_pos point to the end of // the Stream Header field. coder->file_target_pos += LZMA_STREAM_HEADER_SIZE; // If coder->temp_size is non-zero, it points to the end // of the Index field. Then the beginning of the Index // field is at coder->temp[coder->temp_size // - coder->footer_flags.backward_size]. assert(coder->temp_size == 0 || coder->temp_size >= coder->footer_flags.backward_size); // If coder->temp contained the whole Index, see if it has // enough data to contain also the Stream Header. If so, // we avoid an external seek. // // NOTE: This can happen only with small .xz files and only // for the non-first Stream as the Stream Flags of the first // Stream are cached and already handled a few lines above. // So this isn't as useful as the other seek-avoidance cases. if (coder->temp_size != 0 && coder->temp_size - coder->footer_flags.backward_size >= seek_amount) { // Make temp_pos and temp_size point to the *end* of // Stream Header so that SEQ_HEADER_DECODE will find // the start of Stream Header from coder->temp[ // coder->temp_size - LZMA_STREAM_HEADER_SIZE]. coder->temp_pos = coder->temp_size - coder->footer_flags.backward_size - seek_amount + LZMA_STREAM_HEADER_SIZE; coder->temp_size = coder->temp_pos; } else { // Seek so that Stream Header will be at the end of // coder->temp. With typical multi-Stream files we // will usually also get the Stream Footer and Index // of the *previous* Stream in coder->temp and thus // won't need a separate seek for them. return_if_error(reverse_seek(coder, in_start, in_pos, in_size)); } } // Fall through case SEQ_HEADER_DECODE: // Copy the Stream Header field into coder->temp. // If Stream Header was already available in coder->temp // in SEQ_INDEX_DECODE, then this does nothing. if (fill_temp(coder, in, in_pos, in_size)) return LZMA_OK; // Make all these point to the beginning of Stream Header. coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; coder->temp_size -= LZMA_STREAM_HEADER_SIZE; coder->temp_pos = coder->temp_size; // Decode the Stream Header. return_if_error(hide_format_error(lzma_stream_header_decode( &coder->header_flags, coder->temp + coder->temp_size))); coder->sequence = SEQ_HEADER_COMPARE; // Fall through case SEQ_HEADER_COMPARE: // Compare Stream Header against Stream Footer. They must // match. return_if_error(lzma_stream_flags_compare( &coder->header_flags, &coder->footer_flags)); // Store the decoded Stream Flags into the Index. Use the // Footer Flags because it contains Backward Size, although // it shouldn't matter in practice. if (lzma_index_stream_flags(coder->this_index, &coder->footer_flags) != LZMA_OK) return LZMA_PROG_ERROR; // Store also the size of the Stream Padding field. It is // needed to calculate the offsets of the Streams correctly. if (lzma_index_stream_padding(coder->this_index, coder->stream_padding) != LZMA_OK) return LZMA_PROG_ERROR; // Reset it so that it's ready for the next Stream. coder->stream_padding = 0; // Append the earlier decoded Indexes after this_index. if (coder->combined_index != NULL) return_if_error(lzma_index_cat(coder->this_index, coder->combined_index, allocator)); coder->combined_index = coder->this_index; coder->this_index = NULL; // If the whole file was decoded, tell the caller that we // are finished. if (coder->file_target_pos == 0) { // The combined index must indicate the same file // size as was told to us at initialization. assert(lzma_index_file_size(coder->combined_index) == coder->file_size); // Make the combined index available to // the application. *coder->dest_index = coder->combined_index; coder->combined_index = NULL; // Mark the input buffer as used since we may have // done internal seeking and thus don't know how // many input bytes were actually used. This way // lzma_stream.total_in gets a slightly better // estimate of the amount of input used. *in_pos = in_size; return LZMA_STREAM_END; } // We didn't hit the beginning of the file yet, so continue // reading backwards in the file. If we have unprocessed // data in coder->temp, use it before requesting more data // from the application. // // coder->file_target_pos, coder->temp_size, and // coder->temp_pos all point to the beginning of Stream Header // and thus the end of the previous Stream in the file. coder->sequence = coder->temp_size > 0 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK; break; default: assert(0); return LZMA_PROG_ERROR; } } static lzma_ret file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_file_info_coder *coder = coder_ptr; // The memory usage calculation comes from three things: // // (1) The Indexes that have already been decoded and processed into // coder->combined_index. // // (2) The latest Index in coder->this_index that has been decoded but // not yet put into coder->combined_index. // // (3) The latest Index that we have started decoding but haven't // finished and thus isn't available in coder->this_index yet. // Memory usage and limit information needs to be communicated // from/to coder->index_decoder. // // Care has to be taken to not do both (2) and (3) when calculating // the memory usage. uint64_t combined_index_memusage = 0; uint64_t this_index_memusage = 0; // (1) If we have already successfully decoded one or more Indexes, // get their memory usage. if (coder->combined_index != NULL) combined_index_memusage = lzma_index_memused( coder->combined_index); // Choose between (2), (3), or neither. if (coder->this_index != NULL) { // (2) The latest Index is available. Use its memory usage. this_index_memusage = lzma_index_memused(coder->this_index); } else if (coder->sequence == SEQ_INDEX_DECODE) { // (3) The Index decoder is activate and hasn't yet stored // the new index in coder->this_index. Get the memory usage // information from the Index decoder. // // NOTE: If the Index decoder doesn't yet know how much memory // it will eventually need, it will return a tiny value here. uint64_t dummy; if (coder->index_decoder.memconfig(coder->index_decoder.coder, &this_index_memusage, &dummy, 0) != LZMA_OK) { assert(0); return LZMA_PROG_ERROR; } } // Now we know the total memory usage/requirement. If we had neither // old Indexes nor a new Index, this will be zero which isn't // acceptable as lzma_memusage() has to return non-zero on success // and even with an empty .xz file we will end up with a lzma_index // that takes some memory. *memusage = combined_index_memusage + this_index_memusage; if (*memusage == 0) *memusage = lzma_index_memusage(1, 0); *old_memlimit = coder->memlimit; // If requested, set a new memory usage limit. if (new_memlimit != 0) { if (new_memlimit < *memusage) return LZMA_MEMLIMIT_ERROR; // In the condition (3) we need to tell the Index decoder // its new memory usage limit. if (coder->this_index == NULL && coder->sequence == SEQ_INDEX_DECODE) { const uint64_t idec_new_memlimit = new_memlimit - combined_index_memusage; assert(this_index_memusage > 0); assert(idec_new_memlimit > 0); uint64_t dummy1; uint64_t dummy2; if (coder->index_decoder.memconfig( coder->index_decoder.coder, &dummy1, &dummy2, idec_new_memlimit) != LZMA_OK) { assert(0); return LZMA_PROG_ERROR; } } coder->memlimit = new_memlimit; } return LZMA_OK; } static void file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_file_info_coder *coder = coder_ptr; lzma_next_end(&coder->index_decoder, allocator); lzma_index_end(coder->this_index, allocator); lzma_index_end(coder->combined_index, allocator); lzma_free(coder, allocator); return; } static lzma_ret lzma_file_info_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, uint64_t *seek_pos, lzma_index **dest_index, uint64_t memlimit, uint64_t file_size) { lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator); if (dest_index == NULL) return LZMA_PROG_ERROR; lzma_file_info_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &file_info_decode; next->end = &file_info_decoder_end; next->memconfig = &file_info_decoder_memconfig; coder->index_decoder = LZMA_NEXT_CODER_INIT; coder->this_index = NULL; coder->combined_index = NULL; } coder->sequence = SEQ_MAGIC_BYTES; coder->file_cur_pos = 0; coder->file_target_pos = 0; coder->file_size = file_size; lzma_index_end(coder->this_index, allocator); coder->this_index = NULL; lzma_index_end(coder->combined_index, allocator); coder->combined_index = NULL; coder->stream_padding = 0; coder->dest_index = dest_index; coder->external_seek_pos = seek_pos; // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get() // won't return 0 (which would indicate an error). coder->memlimit = my_max(1, memlimit); // Prepare these for reading the first Stream Header into coder->temp. coder->temp_pos = 0; coder->temp_size = LZMA_STREAM_HEADER_SIZE; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index, uint64_t memlimit, uint64_t file_size) { lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos, dest_index, memlimit, file_size); // We allow LZMA_FINISH in addition to LZMA_RUN for convenience. // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED // combination in a sane way. Applications still need to be careful // if they use LZMA_FINISH so that they remember to reset it back // to LZMA_RUN after seeking if needed. strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/lzip_decoder.c b/contrib/xz/src/liblzma/common/lzip_decoder.c index 58c086740ad7..88cc7ffd236b 100644 --- a/contrib/xz/src/liblzma/common/lzip_decoder.c +++ b/contrib/xz/src/liblzma/common/lzip_decoder.c @@ -1,418 +1,418 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzip_decoder.c /// \brief Decodes .lz (lzip) files // // Author: Michał Górny // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lzip_decoder.h" #include "lzma_decoder.h" #include "check.h" // .lz format version 0 lacks the 64-bit Member size field in the footer. #define LZIP_V0_FOOTER_SIZE 12 #define LZIP_V1_FOOTER_SIZE 20 #define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE // lc/lp/pb are hardcoded in the .lz format. #define LZIP_LC 3 #define LZIP_LP 0 #define LZIP_PB 2 typedef struct { enum { SEQ_ID_STRING, SEQ_VERSION, SEQ_DICT_SIZE, SEQ_CODER_INIT, SEQ_LZMA_STREAM, SEQ_MEMBER_FOOTER, } sequence; /// .lz member format version uint32_t version; /// CRC32 of the uncompressed data in the .lz member uint32_t crc32; /// Uncompressed size of the .lz member uint64_t uncompressed_size; /// Compressed size of the .lz member uint64_t member_size; /// Memory usage limit uint64_t memlimit; /// Amount of memory actually needed uint64_t memusage; /// If true, LZMA_GET_CHECK is returned after decoding the header /// fields. As all files use CRC32 this is redundant but it's /// implemented anyway since the initialization functions supports /// all other flags in addition to LZMA_TELL_ANY_CHECK. bool tell_any_check; /// If true, we won't calculate or verify the CRC32 of /// the uncompressed data. bool ignore_check; /// If true, we will decode concatenated .lz members and stop if /// non-.lz data is seen after at least one member has been /// successfully decoded. bool concatenated; /// When decoding concatenated .lz members, this is true as long as /// we are decoding the first .lz member. This is needed to avoid /// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at /// the end of the file. bool first_member; /// Reading position in the header and footer fields size_t pos; /// Buffer to hold the .lz footer fields uint8_t buffer[LZIP_FOOTER_SIZE_MAX]; /// Options decoded from the .lz header that needed to initialize /// the LZMA1 decoder. lzma_options_lzma options; /// LZMA1 decoder lzma_next_coder lzma_decoder; } lzma_lzip_coder; static lzma_ret lzip_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_lzip_coder *coder = coder_ptr; while (true) switch (coder->sequence) { case SEQ_ID_STRING: { // The "ID string" or magic bytes are "LZIP" in US-ASCII. const uint8_t lzip_id_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; while (coder->pos < sizeof(lzip_id_string)) { if (*in_pos >= in_size) { // If we are on the 2nd+ concatenated member // and the input ends before we can read // the magic bytes, we discard the bytes that // were already read (up to 3) and finish. // See the reasoning below. return !coder->first_member && action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK; } if (in[*in_pos] != lzip_id_string[coder->pos]) { // The .lz format allows putting non-.lz data // at the end of the file. If we have seen // at least one valid .lz member already, // then we won't consume the byte at *in_pos // and will return LZMA_STREAM_END. This way // apps can easily locate and read the non-.lz // data after the .lz member(s). // // NOTE: If the first 1-3 bytes of the non-.lz // data match the .lz ID string then the first // 1-3 bytes of the junk will get ignored by // us. If apps want to properly locate the // trailing data they must ensure that the // first byte of their custom data isn't the // same as the first byte of .lz ID string. // With the liblzma API we cannot rewind the // input position across calls to lzma_code(). return !coder->first_member ? LZMA_STREAM_END : LZMA_FORMAT_ERROR; } ++*in_pos; ++coder->pos; } coder->pos = 0; coder->crc32 = 0; coder->uncompressed_size = 0; coder->member_size = sizeof(lzip_id_string); coder->sequence = SEQ_VERSION; } // Fall through case SEQ_VERSION: if (*in_pos >= in_size) return LZMA_OK; coder->version = in[(*in_pos)++]; // We support version 0 and unextended version 1. if (coder->version > 1) return LZMA_OPTIONS_ERROR; ++coder->member_size; coder->sequence = SEQ_DICT_SIZE; // .lz versions 0 and 1 use CRC32 as the integrity check // so if the application wanted to know that // (LZMA_TELL_ANY_CHECK) we can tell it now. if (coder->tell_any_check) return LZMA_GET_CHECK; // Fall through case SEQ_DICT_SIZE: { if (*in_pos >= in_size) return LZMA_OK; const uint32_t ds = in[(*in_pos)++]; ++coder->member_size; // The five lowest bits are for the base-2 logarithm of // the dictionary size and the highest three bits are // the fractional part (0/16 to 7/16) that will be - // substracted to get the final value. + // subtracted to get the final value. // // For example, with 0xB5: // b2log = 21 // fracnum = 5 // dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB const uint32_t b2log = ds & 0x1F; const uint32_t fracnum = ds >> 5; // The format versions 0 and 1 allow dictionary size in the // range [4 KiB, 512 MiB]. if (b2log < 12 || b2log > 29 || (b2log == 12 && fracnum > 0)) return LZMA_DATA_ERROR; // 2^[b2log] - 2^[b2log] * [fracnum] / 16 // = 2^[b2log] - [fracnum] * 2^([b2log] - 4) coder->options.dict_size = (UINT32_C(1) << b2log) - (fracnum << (b2log - 4)); assert(coder->options.dict_size >= 4096); assert(coder->options.dict_size <= (UINT32_C(512) << 20)); coder->options.preset_dict = NULL; coder->options.lc = LZIP_LC; coder->options.lp = LZIP_LP; coder->options.pb = LZIP_PB; // Calculate the memory usage. coder->memusage = lzma_lzma_decoder_memusage(&coder->options) + LZMA_MEMUSAGE_BASE; // Initialization is a separate step because if we return // LZMA_MEMLIMIT_ERROR we need to be able to restart after // the memlimit has been increased. coder->sequence = SEQ_CODER_INIT; } // Fall through case SEQ_CODER_INIT: { if (coder->memusage > coder->memlimit) return LZMA_MEMLIMIT_ERROR; const lzma_filter_info filters[2] = { { .id = LZMA_FILTER_LZMA1, .init = &lzma_lzma_decoder_init, .options = &coder->options, }, { .init = NULL, } }; return_if_error(lzma_next_filter_init(&coder->lzma_decoder, allocator, filters)); coder->crc32 = 0; coder->sequence = SEQ_LZMA_STREAM; } // Fall through case SEQ_LZMA_STREAM: { const size_t in_start = *in_pos; const size_t out_start = *out_pos; const lzma_ret ret = coder->lzma_decoder.code( coder->lzma_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); const size_t out_used = *out_pos - out_start; coder->member_size += *in_pos - in_start; coder->uncompressed_size += out_used; // Don't update the CRC32 if the integrity check will be // ignored or if there was no new output. The latter is // important in case out == NULL to avoid null pointer + 0 // which is undefined behavior. if (!coder->ignore_check && out_used > 0) coder->crc32 = lzma_crc32(out + out_start, out_used, coder->crc32); if (ret != LZMA_STREAM_END) return ret; coder->sequence = SEQ_MEMBER_FOOTER; } // Fall through case SEQ_MEMBER_FOOTER: { // The footer of .lz version 0 lacks the Member size field. // This is the only difference between version 0 and // unextended version 1 formats. const size_t footer_size = coder->version == 0 ? LZIP_V0_FOOTER_SIZE : LZIP_V1_FOOTER_SIZE; // Copy the CRC32, Data size, and Member size fields to // the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, footer_size); // Return if we didn't get the whole footer yet. if (coder->pos < footer_size) return LZMA_OK; coder->pos = 0; coder->member_size += footer_size; // Check that the footer fields match the observed data. if (!coder->ignore_check && coder->crc32 != read32le(&coder->buffer[0])) return LZMA_DATA_ERROR; if (coder->uncompressed_size != read64le(&coder->buffer[4])) return LZMA_DATA_ERROR; if (coder->version > 0) { // .lz version 0 has no Member size field. if (coder->member_size != read64le(&coder->buffer[12])) return LZMA_DATA_ERROR; } // Decoding is finished if we weren't requested to decode // more than one .lz member. if (!coder->concatenated) return LZMA_STREAM_END; coder->first_member = false; coder->sequence = SEQ_ID_STRING; break; } default: assert(0); return LZMA_PROG_ERROR; } // Never reached } static void lzip_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_lzip_coder *coder = coder_ptr; lzma_next_end(&coder->lzma_decoder, allocator); lzma_free(coder, allocator); return; } static lzma_check lzip_decoder_get_check(const void *coder_ptr lzma_attribute((__unused__))) { return LZMA_CHECK_CRC32; } static lzma_ret lzip_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_lzip_coder *coder = coder_ptr; *memusage = coder->memusage; *old_memlimit = coder->memlimit; if (new_memlimit != 0) { if (new_memlimit < coder->memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit = new_memlimit; } return LZMA_OK; } extern lzma_ret lzma_lzip_decoder_init( lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&lzma_lzip_decoder_init, next, allocator); if (flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; lzma_lzip_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_lzip_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &lzip_decode; next->end = &lzip_decoder_end; next->get_check = &lzip_decoder_get_check; next->memconfig = &lzip_decoder_memconfig; coder->lzma_decoder = LZMA_NEXT_CODER_INIT; } coder->sequence = SEQ_ID_STRING; coder->memlimit = my_max(1, memlimit); coder->memusage = LZMA_MEMUSAGE_BASE; coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; coder->concatenated = (flags & LZMA_CONCATENATED) != 0; coder->first_member = true; coder->pos = 0; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_lzip_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) { lzma_next_strm_init(lzma_lzip_decoder_init, strm, memlimit, flags); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/memcmplen.h b/contrib/xz/src/liblzma/common/memcmplen.h index db3fff60ac93..3c12422beaaf 100644 --- a/contrib/xz/src/liblzma/common/memcmplen.h +++ b/contrib/xz/src/liblzma/common/memcmplen.h @@ -1,171 +1,173 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file memcmplen.h /// \brief Optimized comparison of two buffers // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_MEMCMPLEN_H #define LZMA_MEMCMPLEN_H #include "common.h" #ifdef HAVE_IMMINTRIN_H # include #endif // Only include if it is needed. The header is only needed // on Windows when using an MSVC compatible compiler. The Intel compiler // can use the intrinsics without the header file. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ - && (defined(_MSC_VER) \ + && defined(_MSC_VER) \ && defined(_M_X64) \ - && !defined(__INTEL_COMPILER)) + && !defined(__INTEL_COMPILER) # include #endif /// Find out how many equal bytes the two buffers have. /// /// \param buf1 First buffer /// \param buf2 Second buffer /// \param len How many bytes have already been compared and will /// be assumed to match /// \param limit How many bytes to compare at most, including the /// already-compared bytes. This must be significantly /// smaller than UINT32_MAX to avoid integer overflows. /// Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past /// the specified limit from both buf1 and buf2. /// /// \return Number of equal bytes in the buffers is returned. /// This is always at least len and at most limit. /// /// \note LZMA_MEMCMPLEN_EXTRA defines how many extra bytes may be read. /// It's rounded up to 2^n. This extra amount needs to be /// allocated in the buffers being used. It needs to be /// initialized too to keep Valgrind quiet. static inline uint32_t lzma_attribute((__always_inline__)) lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, uint32_t len, uint32_t limit) { assert(len <= limit); assert(limit <= UINT32_MAX / 2); #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ && ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ || (defined(_MSC_VER) && defined(_M_X64))) // I keep this x86-64 only for now since that's where I know this // to be a good method. This may be fine on other 64-bit CPUs too. // On big endian one should use xor instead of subtraction and switch // to __builtin_clzll(). #define LZMA_MEMCMPLEN_EXTRA 8 while (len < limit) { const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len); if (x != 0) { -# if defined(_M_X64) // MSVC or Intel C compiler on Windows + // MSVC or Intel C compiler on Windows +# if (defined(_MSC_VER) || defined(__INTEL_COMPILER)) && defined(_M_X64) unsigned long tmp; _BitScanForward64(&tmp, x); len += (uint32_t)tmp >> 3; -# else // GCC, clang, or Intel C compiler + // GCC, Clang, or Intel C compiler +# else len += (uint32_t)__builtin_ctzll(x) >> 3; # endif return my_min(len, limit); } len += 8; } return limit; #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ && defined(HAVE__MM_MOVEMASK_EPI8) \ && (defined(__SSE2__) \ || (defined(_MSC_VER) && defined(_M_IX86_FP) \ && _M_IX86_FP >= 2)) // NOTE: This will use 128-bit unaligned access which // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, // but it's convenient here since this is x86-only. // // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above // version is sometimes significantly faster and sometimes // slightly slower than this SSE2 version, so this SSE2 // version isn't used on x86-64. # define LZMA_MEMCMPLEN_EXTRA 16 while (len < limit) { const uint32_t x = 0xFFFF ^ (uint32_t)_mm_movemask_epi8( _mm_cmpeq_epi8( _mm_loadu_si128((const __m128i *)(buf1 + len)), _mm_loadu_si128((const __m128i *)(buf2 + len)))); if (x != 0) { len += ctz32(x); return my_min(len, limit); } len += 16; } return limit; #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN) // Generic 32-bit little endian method # define LZMA_MEMCMPLEN_EXTRA 4 while (len < limit) { uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len); if (x != 0) { if ((x & 0xFFFF) == 0) { len += 2; x >>= 16; } if ((x & 0xFF) == 0) ++len; return my_min(len, limit); } len += 4; } return limit; #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN) // Generic 32-bit big endian method # define LZMA_MEMCMPLEN_EXTRA 4 while (len < limit) { uint32_t x = read32ne(buf1 + len) ^ read32ne(buf2 + len); if (x != 0) { if ((x & 0xFFFF0000) == 0) { len += 2; x <<= 16; } if ((x & 0xFF000000) == 0) ++len; return my_min(len, limit); } len += 4; } return limit; #else // Simple portable version that doesn't use unaligned access. # define LZMA_MEMCMPLEN_EXTRA 0 while (len < limit && buf1[len] == buf2[len]) ++len; return len; #endif } #endif diff --git a/contrib/xz/src/liblzma/common/stream_decoder_mt.c b/contrib/xz/src/liblzma/common/stream_decoder_mt.c index b8ba4d390d80..76212b46da32 100644 --- a/contrib/xz/src/liblzma/common/stream_decoder_mt.c +++ b/contrib/xz/src/liblzma/common/stream_decoder_mt.c @@ -1,2018 +1,2018 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder_mt.c /// \brief Multithreaded .xz Stream decoder // // Authors: Sebastian Andrzej Siewior // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "common.h" #include "block_decoder.h" #include "stream_decoder.h" #include "index.h" #include "outqueue.h" typedef enum { /// Waiting for work. /// Main thread may change this to THR_RUN or THR_EXIT. THR_IDLE, /// Decoding is in progress. /// Main thread may change this to THR_STOP or THR_EXIT. /// The worker thread may change this to THR_IDLE. THR_RUN, /// The main thread wants the thread to stop whatever it was doing /// but not exit. Main thread may change this to THR_EXIT. /// The worker thread may change this to THR_IDLE. THR_STOP, /// The main thread wants the thread to exit. THR_EXIT, } worker_state; typedef enum { /// Partial updates (storing of worker thread progress /// to lzma_outbuf) are disabled. PARTIAL_DISABLED, /// Main thread requests partial updates to be enabled but /// no partial update has been done by the worker thread yet. /// /// Changing from PARTIAL_DISABLED to PARTIAL_START requires /// use of the worker-thread mutex. Other transitions don't /// need a mutex. PARTIAL_START, /// Partial updates are enabled and the worker thread has done /// at least one partial update. PARTIAL_ENABLED, } partial_update_mode; struct worker_thread { /// Worker state is protected with our mutex. worker_state state; /// Input buffer that will contain the whole Block except Block Header. uint8_t *in; /// Amount of memory allocated for "in" size_t in_size; /// Number of bytes written to "in" by the main thread size_t in_filled; /// Number of bytes consumed from "in" by the worker thread. size_t in_pos; /// Amount of uncompressed data that has been decoded. This local /// copy is needed because updating outbuf->pos requires locking /// the main mutex (coder->mutex). size_t out_pos; /// Pointer to the main structure is needed to (1) lock the main /// mutex (coder->mutex) when updating outbuf->pos and (2) when /// putting this thread back to the stack of free threads. struct lzma_stream_coder *coder; /// The allocator is set by the main thread. Since a copy of the /// pointer is kept here, the application must not change the /// allocator before calling lzma_end(). const lzma_allocator *allocator; /// Output queue buffer to which the uncompressed data is written. lzma_outbuf *outbuf; /// Amount of compressed data that has already been decompressed. /// This is updated from in_pos when our mutex is locked. /// This is size_t, not uint64_t, because per-thread progress /// is limited to sizes of allocated buffers. size_t progress_in; /// Like progress_in but for uncompressed data. size_t progress_out; /// Updating outbuf->pos requires locking the main mutex /// (coder->mutex). Since the main thread will only read output /// from the oldest outbuf in the queue, only the worker thread /// that is associated with the oldest outbuf needs to update its /// outbuf->pos. This avoids useless mutex contention that would /// happen if all worker threads were frequently locking the main /// mutex to update their outbuf->pos. /// /// Only when partial_update is something else than PARTIAL_DISABLED, /// this worker thread will update outbuf->pos after each call to /// the Block decoder. partial_update_mode partial_update; /// Block decoder lzma_next_coder block_decoder; /// Thread-specific Block options are needed because the Block /// decoder modifies the struct given to it at initialization. lzma_block block_options; /// Filter chain memory usage uint64_t mem_filters; /// Next structure in the stack of free worker threads. struct worker_thread *next; mythread_mutex mutex; mythread_cond cond; /// The ID of this thread is used to join the thread /// when it's not needed anymore. mythread thread_id; }; struct lzma_stream_coder { enum { SEQ_STREAM_HEADER, SEQ_BLOCK_HEADER, SEQ_BLOCK_INIT, SEQ_BLOCK_THR_INIT, SEQ_BLOCK_THR_RUN, SEQ_BLOCK_DIRECT_INIT, SEQ_BLOCK_DIRECT_RUN, SEQ_INDEX_WAIT_OUTPUT, SEQ_INDEX_DECODE, SEQ_STREAM_FOOTER, SEQ_STREAM_PADDING, SEQ_ERROR, } sequence; /// Block decoder lzma_next_coder block_decoder; /// Every Block Header will be decoded into this structure. /// This is also used to initialize a Block decoder when in /// direct mode. In threaded mode, a thread-specific copy will /// be made for decoder initialization because the Block decoder /// will modify the structure given to it. lzma_block block_options; /// Buffer to hold a filter chain for Block Header decoding and /// initialization. These are freed after successful Block decoder /// initialization or at stream_decoder_mt_end(). The thread-specific /// copy of block_options won't hold a pointer to filters[] after /// initialization. lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Stream Flags from Stream Header lzma_stream_flags stream_flags; /// Index is hashed so that it can be compared to the sizes of Blocks /// with O(1) memory usage. lzma_index_hash *index_hash; /// Maximum wait time if cannot use all the input and cannot /// fill the output buffer. This is in milliseconds. uint32_t timeout; /// Error code from a worker thread. /// /// \note Use mutex. lzma_ret thread_error; /// Error code to return after pending output has been copied out. If /// set in read_output_and_wait(), this is a mirror of thread_error. /// If set in stream_decode_mt() then it's, for example, error that /// occurred when decoding Block Header. lzma_ret pending_error; /// Number of threads that will be created at maximum. uint32_t threads_max; /// Number of thread structures that have been initialized from /// "threads", and thus the number of worker threads actually /// created so far. uint32_t threads_initialized; /// Array of allocated thread-specific structures. When no threads /// are in use (direct mode) this is NULL. In threaded mode this /// points to an array of threads_max number of worker_thread structs. struct worker_thread *threads; /// Stack of free threads. When a thread finishes, it puts itself /// back into this stack. This starts as empty because threads /// are created only when actually needed. /// /// \note Use mutex. struct worker_thread *threads_free; /// The most recent worker thread to which the main thread writes /// the new input from the application. struct worker_thread *thr; /// Output buffer queue for decompressed data from the worker threads /// /// \note Use mutex with operations that need it. lzma_outq outq; mythread_mutex mutex; mythread_cond cond; /// Memory usage that will not be exceeded in multi-threaded mode. /// Single-threaded mode can exceed this even by a large amount. uint64_t memlimit_threading; /// Memory usage limit that should never be exceeded. /// LZMA_MEMLIMIT_ERROR will be returned if decoding isn't possible /// even in single-threaded mode without exceeding this limit. uint64_t memlimit_stop; /// Amount of memory in use by the direct mode decoder /// (coder->block_decoder). In threaded mode this is 0. uint64_t mem_direct_mode; /// Amount of memory needed by the running worker threads. /// This doesn't include the memory needed by the output buffer. /// /// \note Use mutex. uint64_t mem_in_use; /// Amount of memory used by the idle (cached) threads. /// /// \note Use mutex. uint64_t mem_cached; /// Amount of memory needed for the filter chain of the next Block. uint64_t mem_next_filters; /// Amount of memory needed for the thread-specific input buffer /// for the next Block. uint64_t mem_next_in; /// Amount of memory actually needed to decode the next Block /// in threaded mode. This is /// mem_next_filters + mem_next_in + memory needed for lzma_outbuf. uint64_t mem_next_block; /// Amount of compressed data in Stream Header + Blocks that have /// already been finished. /// /// \note Use mutex. uint64_t progress_in; /// Amount of uncompressed data in Blocks that have already /// been finished. /// /// \note Use mutex. uint64_t progress_out; /// If true, LZMA_NO_CHECK is returned if the Stream has /// no integrity check. bool tell_no_check; /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has /// an integrity check that isn't supported by this liblzma build. bool tell_unsupported_check; /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. bool tell_any_check; /// If true, we will tell the Block decoder to skip calculating /// and verifying the integrity check. bool ignore_check; /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned /// once the application isn't giving us any new input (LZMA_FINISH), /// and we aren't in the middle of a Stream, and possible /// Stream Padding is a multiple of four bytes. bool concatenated; /// If true, we will return any errors immediately instead of first /// producing all output before the location of the error. bool fail_fast; /// When decoding concatenated Streams, this is true as long as we /// are decoding the first Stream. This is needed to avoid misleading /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic /// bytes. bool first_stream; /// This is used to track if the previous call to stream_decode_mt() /// had output space (*out_pos < out_size) and managed to fill the /// output buffer (*out_pos == out_size). This may be set to true /// in read_output_and_wait(). This is read and then reset to false /// at the beginning of stream_decode_mt(). /// /// This is needed to support applications that call lzma_code() in /// such a way that more input is provided only when lzma_code() /// didn't fill the output buffer completely. Basically, this makes /// it easier to convert such applications from single-threaded /// decoder to multi-threaded decoder. bool out_was_filled; /// Write position in buffer[] and position in Stream Padding size_t pos; /// Buffer to hold Stream Header, Block Header, and Stream Footer. /// Block Header has biggest maximum size. uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; /// Enables updating of outbuf->pos. This is a callback function that is /// used with lzma_outq_enable_partial_output(). static void worker_enable_partial_update(void *thr_ptr) { struct worker_thread *thr = thr_ptr; mythread_sync(thr->mutex) { thr->partial_update = PARTIAL_START; mythread_cond_signal(&thr->cond); } } /// Things do to at THR_STOP or when finishing a Block. /// This is called with thr->mutex locked. static void worker_stop(struct worker_thread *thr) { // Update memory usage counters. thr->coder->mem_in_use -= thr->in_size; thr->in_size = 0; // thr->in was freed above. thr->coder->mem_in_use -= thr->mem_filters; thr->coder->mem_cached += thr->mem_filters; // Put this thread to the stack of free threads. thr->next = thr->coder->threads_free; thr->coder->threads_free = thr; mythread_cond_signal(&thr->coder->cond); return; } static MYTHREAD_RET_TYPE worker_decoder(void *thr_ptr) { struct worker_thread *thr = thr_ptr; size_t in_filled; partial_update_mode partial_update; lzma_ret ret; next_loop_lock: mythread_mutex_lock(&thr->mutex); next_loop_unlocked: if (thr->state == THR_IDLE) { mythread_cond_wait(&thr->cond, &thr->mutex); goto next_loop_unlocked; } if (thr->state == THR_EXIT) { mythread_mutex_unlock(&thr->mutex); lzma_free(thr->in, thr->allocator); lzma_next_end(&thr->block_decoder, thr->allocator); mythread_mutex_destroy(&thr->mutex); mythread_cond_destroy(&thr->cond); return MYTHREAD_RET_VALUE; } if (thr->state == THR_STOP) { thr->state = THR_IDLE; mythread_mutex_unlock(&thr->mutex); mythread_sync(thr->coder->mutex) { worker_stop(thr); } goto next_loop_lock; } assert(thr->state == THR_RUN); // Update progress info for get_progress(). thr->progress_in = thr->in_pos; thr->progress_out = thr->out_pos; // If we don't have any new input, wait for a signal from the main // thread except if partial output has just been enabled. In that // case we will do one normal run so that the partial output info // gets passed to the main thread. The call to block_decoder.code() // is useless but harmless as it can occur only once per Block. in_filled = thr->in_filled; partial_update = thr->partial_update; if (in_filled == thr->in_pos && partial_update != PARTIAL_START) { mythread_cond_wait(&thr->cond, &thr->mutex); goto next_loop_unlocked; } mythread_mutex_unlock(&thr->mutex); // Pass the input in small chunks to the Block decoder. // This way we react reasonably fast if we are told to stop/exit, // and (when partial update is enabled) we tell about our progress // to the main thread frequently enough. const size_t chunk_size = 16384; if ((in_filled - thr->in_pos) > chunk_size) in_filled = thr->in_pos + chunk_size; ret = thr->block_decoder.code( thr->block_decoder.coder, thr->allocator, thr->in, &thr->in_pos, in_filled, thr->outbuf->buf, &thr->out_pos, thr->outbuf->allocated, LZMA_RUN); if (ret == LZMA_OK) { if (partial_update != PARTIAL_DISABLED) { // The main thread uses thr->mutex to change from // PARTIAL_DISABLED to PARTIAL_START. The main thread // doesn't care about this variable after that so we // can safely change it here to PARTIAL_ENABLED // without a mutex. thr->partial_update = PARTIAL_ENABLED; // The main thread is reading decompressed data // from thr->outbuf. Tell the main thread about // our progress. // // NOTE: It's possible that we consumed input without // producing any new output so it's possible that // only in_pos has changed. In case of PARTIAL_START // it is possible that neither in_pos nor out_pos has // changed. mythread_sync(thr->coder->mutex) { thr->outbuf->pos = thr->out_pos; thr->outbuf->decoder_in_pos = thr->in_pos; mythread_cond_signal(&thr->coder->cond); } } goto next_loop_lock; } // Either we finished successfully (LZMA_STREAM_END) or an error // occurred. Both cases are handled almost identically. The error // case requires updating thr->coder->thread_error. // // The sizes are in the Block Header and the Block decoder // checks that they match, thus we know these: assert(ret != LZMA_STREAM_END || thr->in_pos == thr->in_size); assert(ret != LZMA_STREAM_END || thr->out_pos == thr->block_options.uncompressed_size); // Free the input buffer. Don't update in_size as we need // it later to update thr->coder->mem_in_use. lzma_free(thr->in, thr->allocator); thr->in = NULL; mythread_sync(thr->mutex) { if (thr->state != THR_EXIT) thr->state = THR_IDLE; } mythread_sync(thr->coder->mutex) { // Move our progress info to the main thread. thr->coder->progress_in += thr->in_pos; thr->coder->progress_out += thr->out_pos; thr->progress_in = 0; thr->progress_out = 0; // Mark the outbuf as finished. thr->outbuf->pos = thr->out_pos; thr->outbuf->decoder_in_pos = thr->in_pos; thr->outbuf->finished = true; thr->outbuf->finish_ret = ret; thr->outbuf = NULL; // If an error occurred, tell it to the main thread. if (ret != LZMA_STREAM_END && thr->coder->thread_error == LZMA_OK) thr->coder->thread_error = ret; worker_stop(thr); } goto next_loop_lock; } /// Tells the worker threads to exit and waits for them to terminate. static void threads_end(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { coder->threads[i].state = THR_EXIT; mythread_cond_signal(&coder->threads[i].cond); } } for (uint32_t i = 0; i < coder->threads_initialized; ++i) mythread_join(coder->threads[i].thread_id); lzma_free(coder->threads, allocator); coder->threads_initialized = 0; coder->threads = NULL; coder->threads_free = NULL; // The threads don't update these when they exit. Do it here. coder->mem_in_use = 0; coder->mem_cached = 0; return; } static void threads_stop(struct lzma_stream_coder *coder) { for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { // The state must be changed conditionally because // THR_IDLE -> THR_STOP is not a valid state change. if (coder->threads[i].state != THR_IDLE) { coder->threads[i].state = THR_STOP; mythread_cond_signal(&coder->threads[i].cond); } } } return; } /// Initialize a new worker_thread structure and create a new thread. static lzma_ret initialize_new_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { // Allocate the coder->threads array if needed. It's done here instead // of when initializing the decoder because we don't need this if we // use the direct mode (we may even free coder->threads in the middle // of the file if we switch from threaded to direct mode). if (coder->threads == NULL) { coder->threads = lzma_alloc( coder->threads_max * sizeof(struct worker_thread), allocator); if (coder->threads == NULL) return LZMA_MEM_ERROR; } // Pick a free structure. assert(coder->threads_initialized < coder->threads_max); struct worker_thread *thr = &coder->threads[coder->threads_initialized]; if (mythread_mutex_init(&thr->mutex)) goto error_mutex; if (mythread_cond_init(&thr->cond)) goto error_cond; thr->state = THR_IDLE; thr->in = NULL; thr->in_size = 0; thr->allocator = allocator; thr->coder = coder; thr->outbuf = NULL; thr->block_decoder = LZMA_NEXT_CODER_INIT; thr->mem_filters = 0; if (mythread_create(&thr->thread_id, worker_decoder, thr)) goto error_thread; ++coder->threads_initialized; coder->thr = thr; return LZMA_OK; error_thread: mythread_cond_destroy(&thr->cond); error_cond: mythread_mutex_destroy(&thr->mutex); error_mutex: return LZMA_MEM_ERROR; } static lzma_ret get_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { // If there is a free structure on the stack, use it. mythread_sync(coder->mutex) { if (coder->threads_free != NULL) { coder->thr = coder->threads_free; coder->threads_free = coder->threads_free->next; - // The thread is no longer in the cache so substract + // The thread is no longer in the cache so subtract // it from the cached memory usage. Don't add it // to mem_in_use though; the caller will handle it // since it knows how much memory it will actually // use (the filter chain might change). coder->mem_cached -= coder->thr->mem_filters; } } if (coder->thr == NULL) { assert(coder->threads_initialized < coder->threads_max); // Initialize a new thread. return_if_error(initialize_new_thread(coder, allocator)); } coder->thr->in_filled = 0; coder->thr->in_pos = 0; coder->thr->out_pos = 0; coder->thr->progress_in = 0; coder->thr->progress_out = 0; coder->thr->partial_update = PARTIAL_DISABLED; return LZMA_OK; } static lzma_ret read_output_and_wait(struct lzma_stream_coder *coder, const lzma_allocator *allocator, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, bool *input_is_possible, bool waiting_allowed, mythread_condtime *wait_abs, bool *has_blocked) { lzma_ret ret = LZMA_OK; mythread_sync(coder->mutex) { do { // Get as much output from the queue as is possible // without blocking. const size_t out_start = *out_pos; do { ret = lzma_outq_read(&coder->outq, allocator, out, out_pos, out_size, NULL, NULL); // If a Block was finished, tell the worker // thread of the next Block (if it is still // running) to start telling the main thread // when new output is available. if (ret == LZMA_STREAM_END) lzma_outq_enable_partial_output( &coder->outq, &worker_enable_partial_update); // Loop until a Block wasn't finished. // It's important to loop around even if // *out_pos == out_size because there could // be an empty Block that will return // LZMA_STREAM_END without needing any // output space. } while (ret == LZMA_STREAM_END); // Check if lzma_outq_read reported an error from // the Block decoder. if (ret != LZMA_OK) break; // If the output buffer is now full but it wasn't full // when this function was called, set out_was_filled. // This way the next call to stream_decode_mt() knows // that some output was produced and no output space // remained in the previous call to stream_decode_mt(). if (*out_pos == out_size && *out_pos != out_start) coder->out_was_filled = true; // Check if any thread has indicated an error. if (coder->thread_error != LZMA_OK) { // If LZMA_FAIL_FAST was used, report errors // from worker threads immediately. if (coder->fail_fast) { ret = coder->thread_error; break; } // Otherwise set pending_error. The value we // set here will not actually get used other // than working as a flag that an error has // occurred. This is because in SEQ_ERROR // all output before the error will be read // first by calling this function, and once we // reach the location of the (first) error the // error code from the above lzma_outq_read() // will be returned to the application. // // Use LZMA_PROG_ERROR since the value should // never leak to the application. It's // possible that pending_error has already // been set but that doesn't matter: if we get // here, pending_error only works as a flag. coder->pending_error = LZMA_PROG_ERROR; } // Check if decoding of the next Block can be started. // The memusage of the active threads must be low // enough, there must be a free buffer slot in the // output queue, and there must be a free thread // (that can be either created or an existing one // reused). // // NOTE: This is checked after reading the output // above because reading the output can free a slot in // the output queue and also reduce active memusage. // // NOTE: If output queue is empty, then input will // always be possible. if (input_is_possible != NULL && coder->memlimit_threading - coder->mem_in_use - coder->outq.mem_in_use >= coder->mem_next_block && lzma_outq_has_buf(&coder->outq) && (coder->threads_initialized < coder->threads_max || coder->threads_free != NULL)) { *input_is_possible = true; break; } // If the caller doesn't want us to block, return now. if (!waiting_allowed) break; // This check is needed only when input_is_possible // is NULL. We must return if we aren't waiting for // input to become possible and there is no more // output coming from the queue. if (lzma_outq_is_empty(&coder->outq)) { assert(input_is_possible == NULL); break; } // If there is more data available from the queue, // our out buffer must be full and we need to return // so that the application can provide more output // space. // // NOTE: In general lzma_outq_is_readable() can return // true also when there are no more bytes available. // This can happen when a Block has finished without // providing any new output. We know that this is not // the case because in the beginning of this loop we // tried to read as much as possible even when we had // no output space left and the mutex has been locked // all the time (so worker threads cannot have changed // anything). Thus there must be actual pending output // in the queue. if (lzma_outq_is_readable(&coder->outq)) { assert(*out_pos == out_size); break; } // If the application stops providing more input // in the middle of a Block, there will eventually // be one worker thread left that is stuck waiting for // more input (that might never arrive) and a matching // outbuf which the worker thread cannot finish due // to lack of input. We must detect this situation, // otherwise we would end up waiting indefinitely // (if no timeout is in use) or keep returning // LZMA_TIMED_OUT while making no progress. Thus, the // application would never get LZMA_BUF_ERROR from // lzma_code() which would tell the application that // no more progress is possible. No LZMA_BUF_ERROR // means that, for example, truncated .xz files could // cause an infinite loop. // // A worker thread doing partial updates will // store not only the output position in outbuf->pos // but also the matching input position in // outbuf->decoder_in_pos. Here we check if that // input position matches the amount of input that // the worker thread has been given (in_filled). // If so, we must return and not wait as no more // output will be coming without first getting more // input to the worker thread. If the application // keeps calling lzma_code() without providing more // input, it will eventually get LZMA_BUF_ERROR. // // NOTE: We can read partial_update and in_filled // without thr->mutex as only the main thread // modifies these variables. decoder_in_pos requires // coder->mutex which we are already holding. if (coder->thr != NULL && coder->thr->partial_update != PARTIAL_DISABLED) { // There is exactly one outbuf in the queue. assert(coder->thr->outbuf == coder->outq.head); assert(coder->thr->outbuf == coder->outq.tail); if (coder->thr->outbuf->decoder_in_pos == coder->thr->in_filled) break; } // Wait for input or output to become possible. if (coder->timeout != 0) { // See the comment in stream_encoder_mt.c // about why mythread_condtime_set() is used // like this. // // FIXME? // In contrast to the encoder, this calls // _condtime_set while the mutex is locked. if (!*has_blocked) { *has_blocked = true; mythread_condtime_set(wait_abs, &coder->cond, coder->timeout); } if (mythread_cond_timedwait(&coder->cond, &coder->mutex, wait_abs) != 0) { ret = LZMA_TIMED_OUT; break; } } else { mythread_cond_wait(&coder->cond, &coder->mutex); } } while (ret == LZMA_OK); } // If we are returning an error, then the application cannot get // more output from us and thus keeping the threads running is // useless and waste of CPU time. if (ret != LZMA_OK && ret != LZMA_TIMED_OUT) threads_stop(coder); return ret; } static lzma_ret decode_block_header(struct lzma_stream_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { if (*in_pos >= in_size) return LZMA_OK; if (coder->pos == 0) { // Detect if it's Index. if (in[*in_pos] == INDEX_INDICATOR) return LZMA_INDEX_DETECTED; // Calculate the size of the Block Header. Note that // Block Header decoder wants to see this byte too // so don't advance *in_pos. coder->block_options.header_size = lzma_block_header_size_decode( in[*in_pos]); } // Copy the Block Header to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, coder->block_options.header_size); // Return if we didn't get the whole Block Header yet. if (coder->pos < coder->block_options.header_size) return LZMA_OK; coder->pos = 0; // Version 1 is needed to support the .ignore_check option. coder->block_options.version = 1; // Block Header decoder will initialize all members of this array // so we don't need to do it here. coder->block_options.filters = coder->filters; // Decode the Block Header. return_if_error(lzma_block_header_decode(&coder->block_options, allocator, coder->buffer)); // If LZMA_IGNORE_CHECK was used, this flag needs to be set. // It has to be set after lzma_block_header_decode() because // it always resets this to false. coder->block_options.ignore_check = coder->ignore_check; // coder->block_options is ready now. return LZMA_STREAM_END; } /// Get the size of the Compressed Data + Block Padding + Check. static size_t comp_blk_size(const struct lzma_stream_coder *coder) { return vli_ceil4(coder->block_options.compressed_size) + lzma_check_size(coder->stream_flags.check); } /// Returns true if the size (compressed or uncompressed) is such that /// threaded decompression cannot be used. Sizes that are too big compared /// to SIZE_MAX must be rejected to avoid integer overflows and truncations /// when lzma_vli is assigned to a size_t. static bool is_direct_mode_needed(lzma_vli size) { return size == LZMA_VLI_UNKNOWN || size > SIZE_MAX / 3; } static lzma_ret stream_decoder_reset(struct lzma_stream_coder *coder, const lzma_allocator *allocator) { // Initialize the Index hash used to verify the Index. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); if (coder->index_hash == NULL) return LZMA_MEM_ERROR; // Reset the rest of the variables. coder->sequence = SEQ_STREAM_HEADER; coder->pos = 0; return LZMA_OK; } static lzma_ret stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { struct lzma_stream_coder *coder = coder_ptr; mythread_condtime wait_abs; bool has_blocked = false; // Determine if in SEQ_BLOCK_HEADER and SEQ_BLOCK_THR_RUN we should // tell read_output_and_wait() to wait until it can fill the output // buffer (or a timeout occurs). Two conditions must be met: // // (1) If the caller provided no new input. The reason for this // can be, for example, the end of the file or that there is // a pause in the input stream and more input is available // a little later. In this situation we should wait for output // because otherwise we would end up in a busy-waiting loop where // we make no progress and the application just calls us again // without providing any new input. This would then result in // LZMA_BUF_ERROR even though more output would be available // once the worker threads decode more data. // // (2) Even if (1) is true, we will not wait if the previous call to // this function managed to produce some output and the output // buffer became full. This is for compatibility with applications // that call lzma_code() in such a way that new input is provided // only when the output buffer didn't become full. Without this // trick such applications would have bad performance (bad // parallelization due to decoder not getting input fast enough). // // NOTE: Such loops might require that timeout is disabled (0) // if they assume that output-not-full implies that all input has // been consumed. If and only if timeout is enabled, we may return // when output isn't full *and* not all input has been consumed. // // However, if LZMA_FINISH is used, the above is ignored and we always // wait (timeout can still cause us to return) because we know that // we won't get any more input. This matters if the input file is // truncated and we are doing single-shot decoding, that is, // timeout = 0 and LZMA_FINISH is used on the first call to // lzma_code() and the output buffer is known to be big enough // to hold all uncompressed data: // // - If LZMA_FINISH wasn't handled specially, we could return // LZMA_OK before providing all output that is possible with the // truncated input. The rest would be available if lzma_code() was // called again but then it's not single-shot decoding anymore. // // - By handling LZMA_FINISH specially here, the first call will // produce all the output, matching the behavior of the // single-threaded decoder. // // So it's a very specific corner case but also easy to avoid. Note // that this special handling of LZMA_FINISH has no effect for // single-shot decoding when the input file is valid (not truncated); // premature LZMA_OK wouldn't be possible as long as timeout = 0. const bool waiting_allowed = action == LZMA_FINISH || (*in_pos == in_size && !coder->out_was_filled); coder->out_was_filled = false; while (true) switch (coder->sequence) { case SEQ_STREAM_HEADER: { // Copy the Stream Header to the internal buffer. const size_t in_old = *in_pos; lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); coder->progress_in += *in_pos - in_old; // Return if we didn't get the whole Stream Header yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Header. const lzma_ret ret = lzma_stream_header_decode( &coder->stream_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR && !coder->first_stream ? LZMA_DATA_ERROR : ret; // If we are decoding concatenated Streams, and the later // Streams have invalid Header Magic Bytes, we give // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. coder->first_stream = false; // Copy the type of the Check so that Block Header and Block // decoders see it. coder->block_options.check = coder->stream_flags.check; // Even if we return LZMA_*_CHECK below, we want // to continue from Block Header decoding. coder->sequence = SEQ_BLOCK_HEADER; // Detect if there's no integrity check or if it is // unsupported if those were requested by the application. if (coder->tell_no_check && coder->stream_flags.check == LZMA_CHECK_NONE) return LZMA_NO_CHECK; if (coder->tell_unsupported_check && !lzma_check_is_supported( coder->stream_flags.check)) return LZMA_UNSUPPORTED_CHECK; if (coder->tell_any_check) return LZMA_GET_CHECK; } // Fall through case SEQ_BLOCK_HEADER: { const size_t in_old = *in_pos; const lzma_ret ret = decode_block_header(coder, allocator, in, in_pos, in_size); coder->progress_in += *in_pos - in_old; if (ret == LZMA_OK) { // We didn't decode the whole Block Header yet. // // Read output from the queue before returning. This // is important because it is possible that the // application doesn't have any new input available // immediately. If we didn't try to copy output from // the output queue here, lzma_code() could end up // returning LZMA_BUF_ERROR even though queued output // is available. // // If the lzma_code() call provided at least one input // byte, only copy as much data from the output queue // as is available immediately. This way the // application will be able to provide more input // without a delay. // // On the other hand, if lzma_code() was called with // an empty input buffer(*), treat it specially: try // to fill the output buffer even if it requires // waiting for the worker threads to provide output // (timeout, if specified, can still cause us to // return). // // - This way the application will be able to get all // data that can be decoded from the input provided // so far. // // - We avoid both premature LZMA_BUF_ERROR and // busy-waiting where the application repeatedly // calls lzma_code() which immediately returns // LZMA_OK without providing new data. // // - If the queue becomes empty, we won't wait // anything and will return LZMA_OK immediately // (coder->timeout is completely ignored). // // (*) See the comment at the beginning of this // function how waiting_allowed is determined // and why there is an exception to the rule // of "called with an empty input buffer". assert(*in_pos == in_size); // If LZMA_FINISH was used we know that we won't get // more input, so the file must be truncated if we // get here. If worker threads don't detect any // errors, eventually there will be no more output // while we keep returning LZMA_OK which gets // converted to LZMA_BUF_ERROR in lzma_code(). // // If fail-fast is enabled then we will return // immediately using LZMA_DATA_ERROR instead of // LZMA_OK or LZMA_BUF_ERROR. Rationale for the // error code: // // - Worker threads may have a large amount of // not-yet-decoded input data and we don't // know for sure if all data is valid. Bad // data there would result in LZMA_DATA_ERROR // when fail-fast isn't used. // // - Immediate LZMA_BUF_ERROR would be a bit weird // considering the older liblzma code. lzma_code() // even has an assertion to prevent coders from // returning LZMA_BUF_ERROR directly. // // The downside of this is that with fail-fast apps // cannot always distinguish between corrupt and // truncated files. if (action == LZMA_FINISH && coder->fail_fast) { // We won't produce any more output. Stop // the unfinished worker threads so they // won't waste CPU time. threads_stop(coder); return LZMA_DATA_ERROR; } // read_output_and_wait() will call threads_stop() // if needed so with that we can use return_if_error. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, waiting_allowed, &wait_abs, &has_blocked)); if (coder->pending_error != LZMA_OK) { coder->sequence = SEQ_ERROR; break; } return LZMA_OK; } if (ret == LZMA_INDEX_DETECTED) { coder->sequence = SEQ_INDEX_WAIT_OUTPUT; break; } // See if an error occurred. if (ret != LZMA_STREAM_END) { // NOTE: Here and in all other places where // pending_error is set, it may overwrite the value // (LZMA_PROG_ERROR) set by read_output_and_wait(). // That function might overwrite value set here too. // These are fine because when read_output_and_wait() // sets pending_error, it actually works as a flag // variable only ("some error has occurred") and the // actual value of pending_error is not used in // SEQ_ERROR. In such cases SEQ_ERROR will eventually // get the correct error code from the return value of // a later read_output_and_wait() call. coder->pending_error = ret; coder->sequence = SEQ_ERROR; break; } // Calculate the memory usage of the filters / Block decoder. coder->mem_next_filters = lzma_raw_decoder_memusage( coder->filters); if (coder->mem_next_filters == UINT64_MAX) { // One or more unknown Filter IDs. coder->pending_error = LZMA_OPTIONS_ERROR; coder->sequence = SEQ_ERROR; break; } coder->sequence = SEQ_BLOCK_INIT; } // Fall through case SEQ_BLOCK_INIT: { // Check if decoding is possible at all with the current // memlimit_stop which we must never exceed. // // This needs to be the first thing in SEQ_BLOCK_INIT // to make it possible to restart decoding after increasing // memlimit_stop with lzma_memlimit_set(). if (coder->mem_next_filters > coder->memlimit_stop) { // Flush pending output before returning // LZMA_MEMLIMIT_ERROR. If the application doesn't // want to increase the limit, at least it will get // all the output possible so far. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; return LZMA_MEMLIMIT_ERROR; } // Check if the size information is available in Block Header. // If it is, check if the sizes are small enough that we don't // need to worry *too* much about integer overflows later in // the code. If these conditions are not met, we must use the // single-threaded direct mode. if (is_direct_mode_needed(coder->block_options.compressed_size) || is_direct_mode_needed( coder->block_options.uncompressed_size)) { coder->sequence = SEQ_BLOCK_DIRECT_INIT; break; } // Calculate the amount of memory needed for the input and // output buffers in threaded mode. // // These cannot overflow because we already checked that // the sizes are small enough using is_direct_mode_needed(). coder->mem_next_in = comp_blk_size(coder); const uint64_t mem_buffers = coder->mem_next_in + lzma_outq_outbuf_memusage( coder->block_options.uncompressed_size); // Add the amount needed by the filters. // Avoid integer overflows. if (UINT64_MAX - mem_buffers < coder->mem_next_filters) { // Use direct mode if the memusage would overflow. // This is a theoretical case that shouldn't happen // in practice unless the input file is weird (broken // or malicious). coder->sequence = SEQ_BLOCK_DIRECT_INIT; break; } // Amount of memory needed to decode this Block in // threaded mode: coder->mem_next_block = coder->mem_next_filters + mem_buffers; // If this alone would exceed memlimit_threading, then we must // use the single-threaded direct mode. if (coder->mem_next_block > coder->memlimit_threading) { coder->sequence = SEQ_BLOCK_DIRECT_INIT; break; } // Use the threaded mode. Free the direct mode decoder in // case it has been initialized. lzma_next_end(&coder->block_decoder, allocator); coder->mem_direct_mode = 0; // Since we already know what the sizes are supposed to be, // we can already add them to the Index hash. The Block // decoder will verify the values while decoding. const lzma_ret ret = lzma_index_hash_append(coder->index_hash, lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size); if (ret != LZMA_OK) { coder->pending_error = ret; coder->sequence = SEQ_ERROR; break; } coder->sequence = SEQ_BLOCK_THR_INIT; } // Fall through case SEQ_BLOCK_THR_INIT: { // We need to wait for a multiple conditions to become true // until we can initialize the Block decoder and let a worker // thread decode it: // // - Wait for the memory usage of the active threads to drop // so that starting the decoding of this Block won't make // us go over memlimit_threading. // // - Wait for at least one free output queue slot. // // - Wait for a free worker thread. // // While we wait, we must copy decompressed data to the out // buffer and catch possible decoder errors. // // read_output_and_wait() does all the above. bool block_can_start = false; return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, &block_can_start, true, &wait_abs, &has_blocked)); if (coder->pending_error != LZMA_OK) { coder->sequence = SEQ_ERROR; break; } if (!block_can_start) { // It's not a timeout because return_if_error handles // it already. Output queue cannot be empty either // because in that case block_can_start would have // been true. Thus the output buffer must be full and // the queue isn't empty. assert(*out_pos == out_size); assert(!lzma_outq_is_empty(&coder->outq)); return LZMA_OK; } // We know that we can start decoding this Block without // exceeding memlimit_threading. However, to stay below // memlimit_threading may require freeing some of the // cached memory. // // Get a local copy of variables that require locking the // mutex. It is fine if the worker threads modify the real // values after we read these as those changes can only be // towards more favorable conditions (less memory in use, // more in cache). // - // These are initalized to silence warnings. + // These are initialized to silence warnings. uint64_t mem_in_use = 0; uint64_t mem_cached = 0; struct worker_thread *thr = NULL; mythread_sync(coder->mutex) { mem_in_use = coder->mem_in_use; mem_cached = coder->mem_cached; thr = coder->threads_free; } // The maximum amount of memory that can be held by other // threads and cached buffers while allowing us to start // decoding the next Block. const uint64_t mem_max = coder->memlimit_threading - coder->mem_next_block; // If the existing allocations are so large that starting // to decode this Block might exceed memlimit_threads, // try to free memory from the output queue cache first. // // NOTE: This math assumes the worst case. It's possible // that the limit wouldn't be exceeded if the existing cached // allocations are reused. if (mem_in_use + mem_cached + coder->outq.mem_allocated > mem_max) { // Clear the outq cache except leave one buffer in // the cache if its size is correct. That way we // don't free and almost immediately reallocate // an identical buffer. lzma_outq_clear_cache2(&coder->outq, allocator, coder->block_options.uncompressed_size); } // If there is at least one worker_thread in the cache and // the existing allocations are so large that starting to // decode this Block might exceed memlimit_threads, free // memory by freeing cached Block decoders. // // NOTE: The comparison is different here than above. // Here we don't care about cached buffers in outq anymore // and only look at memory actually in use. This is because // if there is something in outq cache, it's a single buffer // that can be used as is. We ensured this in the above // if-block. uint64_t mem_freed = 0; if (thr != NULL && mem_in_use + mem_cached + coder->outq.mem_in_use > mem_max) { // Don't free the first Block decoder if its memory // usage isn't greater than what this Block will need. // Typically the same filter chain is used for all // Blocks so this way the allocations can be reused // when get_thread() picks the first worker_thread // from the cache. if (thr->mem_filters <= coder->mem_next_filters) thr = thr->next; while (thr != NULL) { lzma_next_end(&thr->block_decoder, allocator); mem_freed += thr->mem_filters; thr->mem_filters = 0; thr = thr->next; } } // Update the memory usage counters. Note that coder->mem_* - // may have changed since we read them so we must substract + // may have changed since we read them so we must subtract // or add the changes. mythread_sync(coder->mutex) { coder->mem_cached -= mem_freed; // Memory needed for the filters and the input buffer. // The output queue takes care of its own counter so // we don't touch it here. // // NOTE: After this, coder->mem_in_use + // coder->mem_cached might count the same thing twice. // If so, this will get corrected in get_thread() when // a worker_thread is picked from coder->free_threads - // and its memory usage is substracted from mem_cached. + // and its memory usage is subtracted from mem_cached. coder->mem_in_use += coder->mem_next_in + coder->mem_next_filters; } // Allocate memory for the output buffer in the output queue. lzma_ret ret = lzma_outq_prealloc_buf( &coder->outq, allocator, coder->block_options.uncompressed_size); if (ret != LZMA_OK) { threads_stop(coder); return ret; } // Set up coder->thr. ret = get_thread(coder, allocator); if (ret != LZMA_OK) { threads_stop(coder); return ret; } // The new Block decoder memory usage is already counted in // coder->mem_in_use. Store it in the thread too. coder->thr->mem_filters = coder->mem_next_filters; // Initialize the Block decoder. coder->thr->block_options = coder->block_options; ret = lzma_block_decoder_init( &coder->thr->block_decoder, allocator, &coder->thr->block_options); // Free the allocated filter options since they are needed // only to initialize the Block decoder. lzma_filters_free(coder->filters, allocator); coder->thr->block_options.filters = NULL; // Check if memory usage calculation and Block encoder // initialization succeeded. if (ret != LZMA_OK) { coder->pending_error = ret; coder->sequence = SEQ_ERROR; break; } // Allocate the input buffer. coder->thr->in_size = coder->mem_next_in; coder->thr->in = lzma_alloc(coder->thr->in_size, allocator); if (coder->thr->in == NULL) { threads_stop(coder); return LZMA_MEM_ERROR; } // Get the preallocated output buffer. coder->thr->outbuf = lzma_outq_get_buf( &coder->outq, coder->thr); // Start the decoder. mythread_sync(coder->thr->mutex) { assert(coder->thr->state == THR_IDLE); coder->thr->state = THR_RUN; mythread_cond_signal(&coder->thr->cond); } // Enable output from the thread that holds the oldest output // buffer in the output queue (if such a thread exists). mythread_sync(coder->mutex) { lzma_outq_enable_partial_output(&coder->outq, &worker_enable_partial_update); } coder->sequence = SEQ_BLOCK_THR_RUN; } // Fall through case SEQ_BLOCK_THR_RUN: { if (action == LZMA_FINISH && coder->fail_fast) { // We know that we won't get more input and that // the caller wants fail-fast behavior. If we see // that we don't have enough input to finish this // Block, return LZMA_DATA_ERROR immediately. // See SEQ_BLOCK_HEADER for the error code rationale. const size_t in_avail = in_size - *in_pos; const size_t in_needed = coder->thr->in_size - coder->thr->in_filled; if (in_avail < in_needed) { threads_stop(coder); return LZMA_DATA_ERROR; } } // Copy input to the worker thread. size_t cur_in_filled = coder->thr->in_filled; lzma_bufcpy(in, in_pos, in_size, coder->thr->in, &cur_in_filled, coder->thr->in_size); // Tell the thread how much we copied. mythread_sync(coder->thr->mutex) { coder->thr->in_filled = cur_in_filled; // NOTE: Most of the time we are copying input faster // than the thread can decode so most of the time // calling mythread_cond_signal() is useless but // we cannot make it conditional because thr->in_pos // is updated without a mutex. And the overhead should // be very much negligible anyway. mythread_cond_signal(&coder->thr->cond); } // Read output from the output queue. Just like in // SEQ_BLOCK_HEADER, we wait to fill the output buffer // only if waiting_allowed was set to true in the beginning // of this function (see the comment there). return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, waiting_allowed, &wait_abs, &has_blocked)); if (coder->pending_error != LZMA_OK) { coder->sequence = SEQ_ERROR; break; } // Return if the input didn't contain the whole Block. if (coder->thr->in_filled < coder->thr->in_size) { assert(*in_pos == in_size); return LZMA_OK; } // The whole Block has been copied to the thread-specific // buffer. Continue from the next Block Header or Index. coder->thr = NULL; coder->sequence = SEQ_BLOCK_HEADER; break; } case SEQ_BLOCK_DIRECT_INIT: { // Wait for the threads to finish and that all decoded data // has been copied to the output. That is, wait until the // output queue becomes empty. // // NOTE: No need to check for coder->pending_error as // we aren't consuming any input until the queue is empty // and if there is a pending error, read_output_and_wait() // will eventually return it before the queue is empty. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; // Free the cached output buffers. lzma_outq_clear_cache(&coder->outq, allocator); // Get rid of the worker threads, including the coder->threads // array. threads_end(coder, allocator); // Initialize the Block decoder. const lzma_ret ret = lzma_block_decoder_init( &coder->block_decoder, allocator, &coder->block_options); // Free the allocated filter options since they are needed // only to initialize the Block decoder. lzma_filters_free(coder->filters, allocator); coder->block_options.filters = NULL; // Check if Block decoder initialization succeeded. if (ret != LZMA_OK) return ret; // Make the memory usage visible to _memconfig(). coder->mem_direct_mode = coder->mem_next_filters; coder->sequence = SEQ_BLOCK_DIRECT_RUN; } // Fall through case SEQ_BLOCK_DIRECT_RUN: { const size_t in_old = *in_pos; const size_t out_old = *out_pos; const lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); coder->progress_in += *in_pos - in_old; coder->progress_out += *out_pos - out_old; if (ret != LZMA_STREAM_END) return ret; // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_HEADER; break; } case SEQ_INDEX_WAIT_OUTPUT: // Flush the output from all worker threads so that we can // decode the Index without thinking about threading. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; coder->sequence = SEQ_INDEX_DECODE; // Fall through case SEQ_INDEX_DECODE: { // If we don't have any input, don't call // lzma_index_hash_decode() since it would return // LZMA_BUF_ERROR, which we must not do here. if (*in_pos >= in_size) return LZMA_OK; // Decode the Index and compare it to the hash calculated // from the sizes of the Blocks (if any). const size_t in_old = *in_pos; const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, in, in_pos, in_size); coder->progress_in += *in_pos - in_old; if (ret != LZMA_STREAM_END) return ret; coder->sequence = SEQ_STREAM_FOOTER; } // Fall through case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. const size_t in_old = *in_pos; lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); coder->progress_in += *in_pos - in_old; // Return if we didn't get the whole Stream Footer yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Footer. The decoder gives // LZMA_FORMAT_ERROR if the magic bytes don't match, // so convert that return code to LZMA_DATA_ERROR. lzma_stream_flags footer_flags; const lzma_ret ret = lzma_stream_footer_decode( &footer_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR ? LZMA_DATA_ERROR : ret; // Check that Index Size stored in the Stream Footer matches // the real size of the Index field. if (lzma_index_hash_size(coder->index_hash) != footer_flags.backward_size) return LZMA_DATA_ERROR; // Compare that the Stream Flags fields are identical in // both Stream Header and Stream Footer. return_if_error(lzma_stream_flags_compare( &coder->stream_flags, &footer_flags)); if (!coder->concatenated) return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; } // Fall through case SEQ_STREAM_PADDING: assert(coder->concatenated); // Skip over possible Stream Padding. while (true) { if (*in_pos >= in_size) { // Unless LZMA_FINISH was used, we cannot // know if there's more input coming later. if (action != LZMA_FINISH) return LZMA_OK; // Stream Padding must be a multiple of // four bytes. return coder->pos == 0 ? LZMA_STREAM_END : LZMA_DATA_ERROR; } // If the byte is not zero, it probably indicates // beginning of a new Stream (or the file is corrupt). if (in[*in_pos] != 0x00) break; ++*in_pos; ++coder->progress_in; coder->pos = (coder->pos + 1) & 3; } // Stream Padding must be a multiple of four bytes (empty // Stream Padding is OK). if (coder->pos != 0) { ++*in_pos; ++coder->progress_in; return LZMA_DATA_ERROR; } // Prepare to decode the next Stream. return_if_error(stream_decoder_reset(coder, allocator)); break; case SEQ_ERROR: if (!coder->fail_fast) { // Let the application get all data before the point // where the error was detected. This matches the // behavior of single-threaded use. // // FIXME? Some errors (LZMA_MEM_ERROR) don't get here, // they are returned immediately. Thus in rare cases // the output will be less than in the single-threaded // mode. Maybe this doesn't matter much in practice. return_if_error(read_output_and_wait(coder, allocator, out, out_pos, out_size, NULL, true, &wait_abs, &has_blocked)); // We get here only if the error happened in the main // thread, for example, unsupported Block Header. if (!lzma_outq_is_empty(&coder->outq)) return LZMA_OK; } // We only get here if no errors were detected by the worker // threads. Errors from worker threads would have already been // returned by the call to read_output_and_wait() above. return coder->pending_error; default: assert(0); return LZMA_PROG_ERROR; } // Never reached } static void stream_decoder_mt_end(void *coder_ptr, const lzma_allocator *allocator) { struct lzma_stream_coder *coder = coder_ptr; threads_end(coder, allocator); lzma_outq_end(&coder->outq, allocator); lzma_next_end(&coder->block_decoder, allocator); lzma_filters_free(coder->filters, allocator); lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_check stream_decoder_mt_get_check(const void *coder_ptr) { const struct lzma_stream_coder *coder = coder_ptr; return coder->stream_flags.check; } static lzma_ret stream_decoder_mt_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { // NOTE: This function gets/sets memlimit_stop. For now, // memlimit_threading cannot be modified after initialization. // // *memusage will include cached memory too. Excluding cached memory // would be misleading and it wouldn't help the applications to // know how much memory is actually needed to decompress the file // because the higher the number of threads and the memlimits are // the more memory the decoder may use. // // Setting a new limit includes the cached memory too and too low // limits will be rejected. Alternative could be to free the cached // memory immediately if that helps to bring the limit down but // the current way is the simplest. It's unlikely that limit needs // to be lowered in the middle of a file anyway; the typical reason // to want a new limit is to increase after LZMA_MEMLIMIT_ERROR // and even such use isn't common. struct lzma_stream_coder *coder = coder_ptr; mythread_sync(coder->mutex) { *memusage = coder->mem_direct_mode + coder->mem_in_use + coder->mem_cached + coder->outq.mem_allocated; } // If no filter chains are allocated, *memusage may be zero. // Always return at least LZMA_MEMUSAGE_BASE. if (*memusage < LZMA_MEMUSAGE_BASE) *memusage = LZMA_MEMUSAGE_BASE; *old_memlimit = coder->memlimit_stop; if (new_memlimit != 0) { if (new_memlimit < *memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit_stop = new_memlimit; } return LZMA_OK; } static void stream_decoder_mt_get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out) { struct lzma_stream_coder *coder = coder_ptr; // Lock coder->mutex to prevent finishing threads from moving their // progress info from the worker_thread structure to lzma_stream_coder. mythread_sync(coder->mutex) { *progress_in = coder->progress_in; *progress_out = coder->progress_out; for (size_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { *progress_in += coder->threads[i].progress_in; *progress_out += coder->threads[i] .progress_out; } } } return; } static lzma_ret stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_mt *options) { struct lzma_stream_coder *coder; if (options->threads == 0 || options->threads > LZMA_THREADS_MAX) return LZMA_OPTIONS_ERROR; if (options->flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; lzma_next_coder_init(&stream_decoder_mt_init, next, allocator); coder = next->coder; if (!coder) { coder = lzma_alloc(sizeof(struct lzma_stream_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; if (mythread_mutex_init(&coder->mutex)) { lzma_free(coder, allocator); return LZMA_MEM_ERROR; } if (mythread_cond_init(&coder->cond)) { mythread_mutex_destroy(&coder->mutex); lzma_free(coder, allocator); return LZMA_MEM_ERROR; } next->code = &stream_decode_mt; next->end = &stream_decoder_mt_end; next->get_check = &stream_decoder_mt_get_check; next->memconfig = &stream_decoder_mt_memconfig; next->get_progress = &stream_decoder_mt_get_progress; coder->filters[0].id = LZMA_VLI_UNKNOWN; memzero(&coder->outq, sizeof(coder->outq)); coder->block_decoder = LZMA_NEXT_CODER_INIT; coder->mem_direct_mode = 0; coder->index_hash = NULL; coder->threads = NULL; coder->threads_free = NULL; coder->threads_initialized = 0; } // Cleanup old filter chain if one remains after unfinished decoding // of a previous Stream. lzma_filters_free(coder->filters, allocator); // By allocating threads from scratch we can start memory-usage // accounting from scratch, too. Changes in filter and block sizes may // affect number of threads. // // FIXME? Reusing should be easy but unlike the single-threaded // decoder, with some types of input file combinations reusing // could leave quite a lot of memory allocated but unused (first // file could allocate a lot, the next files could use fewer // threads and some of the allocations from the first file would not // get freed unless memlimit_threading forces us to clear caches). // // NOTE: The direct mode decoder isn't freed here if one exists. // It will be reused or freed as needed in the main loop. threads_end(coder, allocator); // All memusage counters start at 0 (including mem_direct_mode). // The little extra that is needed for the structs in this file // get accounted well enough by the filter chain memory usage // which adds LZMA_MEMUSAGE_BASE for each chain. However, // stream_decoder_mt_memconfig() has to handle this specially so that // it will never return less than LZMA_MEMUSAGE_BASE as memory usage. coder->mem_in_use = 0; coder->mem_cached = 0; coder->mem_next_block = 0; coder->progress_in = 0; coder->progress_out = 0; coder->sequence = SEQ_STREAM_HEADER; coder->thread_error = LZMA_OK; coder->pending_error = LZMA_OK; coder->thr = NULL; coder->timeout = options->timeout; coder->memlimit_threading = my_max(1, options->memlimit_threading); coder->memlimit_stop = my_max(1, options->memlimit_stop); if (coder->memlimit_threading > coder->memlimit_stop) coder->memlimit_threading = coder->memlimit_stop; coder->tell_no_check = (options->flags & LZMA_TELL_NO_CHECK) != 0; coder->tell_unsupported_check = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0; coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0; coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0; coder->first_stream = true; coder->out_was_filled = false; coder->pos = 0; coder->threads_max = options->threads; return_if_error(lzma_outq_init(&coder->outq, allocator, coder->threads_max)); return stream_decoder_reset(coder, allocator); } extern LZMA_API(lzma_ret) lzma_stream_decoder_mt(lzma_stream *strm, const lzma_mt *options) { lzma_next_strm_init(stream_decoder_mt_init, strm, options); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/stream_encoder_mt.c b/contrib/xz/src/liblzma/common/stream_encoder_mt.c index 5990742b6b7d..f64de9bdbc57 100644 --- a/contrib/xz/src/liblzma/common/stream_encoder_mt.c +++ b/contrib/xz/src/liblzma/common/stream_encoder_mt.c @@ -1,1283 +1,1283 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_encoder_mt.c /// \brief Multithreaded .xz Stream encoder // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" #include "easy_preset.h" #include "block_encoder.h" #include "block_buffer_encoder.h" #include "index_encoder.h" #include "outqueue.h" /// Maximum supported block size. This makes it simpler to prevent integer /// overflows if we are given unusually large block size. #define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX) typedef enum { /// Waiting for work. THR_IDLE, /// Encoding is in progress. THR_RUN, /// Encoding is in progress but no more input data will /// be read. THR_FINISH, /// The main thread wants the thread to stop whatever it was doing /// but not exit. THR_STOP, /// The main thread wants the thread to exit. We could use /// cancellation but since there's stopped anyway, this is lazier. THR_EXIT, } worker_state; typedef struct lzma_stream_coder_s lzma_stream_coder; typedef struct worker_thread_s worker_thread; struct worker_thread_s { worker_state state; /// Input buffer of coder->block_size bytes. The main thread will /// put new input into this and update in_size accordingly. Once /// no more input is coming, state will be set to THR_FINISH. uint8_t *in; /// Amount of data available in the input buffer. This is modified /// only by the main thread. size_t in_size; /// Output buffer for this thread. This is set by the main /// thread every time a new Block is started with this thread /// structure. lzma_outbuf *outbuf; /// Pointer to the main structure is needed when putting this /// thread back to the stack of free threads. lzma_stream_coder *coder; /// The allocator is set by the main thread. Since a copy of the /// pointer is kept here, the application must not change the /// allocator before calling lzma_end(). const lzma_allocator *allocator; /// Amount of uncompressed data that has already been compressed. uint64_t progress_in; /// Amount of compressed data that is ready. uint64_t progress_out; /// Block encoder lzma_next_coder block_encoder; /// Compression options for this Block lzma_block block_options; /// Filter chain for this thread. By copying the filters array /// to each thread it is possible to change the filter chain /// between Blocks using lzma_filters_update(). lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Next structure in the stack of free worker threads. worker_thread *next; mythread_mutex mutex; mythread_cond cond; /// The ID of this thread is used to join the thread /// when it's not needed anymore. mythread thread_id; }; struct lzma_stream_coder_s { enum { SEQ_STREAM_HEADER, SEQ_BLOCK, SEQ_INDEX, SEQ_STREAM_FOOTER, } sequence; /// Start a new Block every block_size bytes of input unless /// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier. size_t block_size; /// The filter chain to use for the next Block. /// This can be updated using lzma_filters_update() /// after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH. lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// A copy of filters[] will be put here when attempting to get /// a new worker thread. This will be copied to a worker thread /// when a thread becomes free and then this cache is marked as /// empty by setting [0].id = LZMA_VLI_UNKNOWN. Without this cache /// the filter options from filters[] would get uselessly copied /// multiple times (allocated and freed) when waiting for a new free /// worker thread. /// /// This is freed if filters[] is updated via lzma_filters_update(). lzma_filter filters_cache[LZMA_FILTERS_MAX + 1]; /// Index to hold sizes of the Blocks lzma_index *index; /// Index encoder lzma_next_coder index_encoder; /// Stream Flags for encoding the Stream Header and Stream Footer. lzma_stream_flags stream_flags; /// Buffer to hold Stream Header and Stream Footer. uint8_t header[LZMA_STREAM_HEADER_SIZE]; /// Read position in header[] size_t header_pos; /// Output buffer queue for compressed data lzma_outq outq; /// How much memory to allocate for each lzma_outbuf.buf size_t outbuf_alloc_size; /// Maximum wait time if cannot use all the input and cannot /// fill the output buffer. This is in milliseconds. uint32_t timeout; /// Error code from a worker thread lzma_ret thread_error; /// Array of allocated thread-specific structures worker_thread *threads; /// Number of structures in "threads" above. This is also the /// number of threads that will be created at maximum. uint32_t threads_max; /// Number of thread structures that have been initialized, and /// thus the number of worker threads actually created so far. uint32_t threads_initialized; /// Stack of free threads. When a thread finishes, it puts itself /// back into this stack. This starts as empty because threads /// are created only when actually needed. worker_thread *threads_free; /// The most recent worker thread to which the main thread writes /// the new input from the application. worker_thread *thr; /// Amount of uncompressed data in Blocks that have already /// been finished. uint64_t progress_in; /// Amount of compressed data in Stream Header + Blocks that /// have already been finished. uint64_t progress_out; mythread_mutex mutex; mythread_cond cond; }; /// Tell the main thread that something has gone wrong. static void worker_error(worker_thread *thr, lzma_ret ret) { assert(ret != LZMA_OK); assert(ret != LZMA_STREAM_END); mythread_sync(thr->coder->mutex) { if (thr->coder->thread_error == LZMA_OK) thr->coder->thread_error = ret; mythread_cond_signal(&thr->coder->cond); } return; } static worker_state worker_encode(worker_thread *thr, size_t *out_pos, worker_state state) { assert(thr->progress_in == 0); assert(thr->progress_out == 0); // Set the Block options. thr->block_options = (lzma_block){ .version = 0, .check = thr->coder->stream_flags.check, .compressed_size = thr->outbuf->allocated, .uncompressed_size = thr->coder->block_size, .filters = thr->filters, }; // Calculate maximum size of the Block Header. This amount is // reserved in the beginning of the buffer so that Block Header // along with Compressed Size and Uncompressed Size can be // written there. lzma_ret ret = lzma_block_header_size(&thr->block_options); if (ret != LZMA_OK) { worker_error(thr, ret); return THR_STOP; } // Initialize the Block encoder. ret = lzma_block_encoder_init(&thr->block_encoder, thr->allocator, &thr->block_options); if (ret != LZMA_OK) { worker_error(thr, ret); return THR_STOP; } size_t in_pos = 0; size_t in_size = 0; *out_pos = thr->block_options.header_size; const size_t out_size = thr->outbuf->allocated; do { mythread_sync(thr->mutex) { // Store in_pos and *out_pos into *thr so that // an application may read them via // lzma_get_progress() to get progress information. // // NOTE: These aren't updated when the encoding // finishes. Instead, the final values are taken // later from thr->outbuf. thr->progress_in = in_pos; thr->progress_out = *out_pos; while (in_size == thr->in_size && thr->state == THR_RUN) mythread_cond_wait(&thr->cond, &thr->mutex); state = thr->state; in_size = thr->in_size; } // Return if we were asked to stop or exit. if (state >= THR_STOP) return state; lzma_action action = state == THR_FINISH ? LZMA_FINISH : LZMA_RUN; // Limit the amount of input given to the Block encoder // at once. This way this thread can react fairly quickly // if the main thread wants us to stop or exit. static const size_t in_chunk_max = 16384; size_t in_limit = in_size; if (in_size - in_pos > in_chunk_max) { in_limit = in_pos + in_chunk_max; action = LZMA_RUN; } ret = thr->block_encoder.code( thr->block_encoder.coder, thr->allocator, thr->in, &in_pos, in_limit, thr->outbuf->buf, out_pos, out_size, action); } while (ret == LZMA_OK && *out_pos < out_size); switch (ret) { case LZMA_STREAM_END: assert(state == THR_FINISH); // Encode the Block Header. By doing it after // the compression, we can store the Compressed Size // and Uncompressed Size fields. ret = lzma_block_header_encode(&thr->block_options, thr->outbuf->buf); if (ret != LZMA_OK) { worker_error(thr, ret); return THR_STOP; } break; case LZMA_OK: // The data was incompressible. Encode it using uncompressed // LZMA2 chunks. // // First wait that we have gotten all the input. mythread_sync(thr->mutex) { while (thr->state == THR_RUN) mythread_cond_wait(&thr->cond, &thr->mutex); state = thr->state; in_size = thr->in_size; } if (state >= THR_STOP) return state; // Do the encoding. This takes care of the Block Header too. *out_pos = 0; ret = lzma_block_uncomp_encode(&thr->block_options, thr->in, in_size, thr->outbuf->buf, out_pos, out_size); // It shouldn't fail. if (ret != LZMA_OK) { worker_error(thr, LZMA_PROG_ERROR); return THR_STOP; } break; default: worker_error(thr, ret); return THR_STOP; } // Set the size information that will be read by the main thread // to write the Index field. thr->outbuf->unpadded_size = lzma_block_unpadded_size(&thr->block_options); assert(thr->outbuf->unpadded_size != 0); thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size; return THR_FINISH; } static MYTHREAD_RET_TYPE worker_start(void *thr_ptr) { worker_thread *thr = thr_ptr; worker_state state = THR_IDLE; // Init to silence a warning while (true) { // Wait for work. mythread_sync(thr->mutex) { while (true) { // The thread is already idle so if we are // requested to stop, just set the state. if (thr->state == THR_STOP) { thr->state = THR_IDLE; mythread_cond_signal(&thr->cond); } state = thr->state; if (state != THR_IDLE) break; mythread_cond_wait(&thr->cond, &thr->mutex); } } size_t out_pos = 0; assert(state != THR_IDLE); assert(state != THR_STOP); if (state <= THR_FINISH) state = worker_encode(thr, &out_pos, state); if (state == THR_EXIT) break; // Mark the thread as idle unless the main thread has // told us to exit. Signal is needed for the case // where the main thread is waiting for the threads to stop. mythread_sync(thr->mutex) { if (thr->state != THR_EXIT) { thr->state = THR_IDLE; mythread_cond_signal(&thr->cond); } } mythread_sync(thr->coder->mutex) { // If no errors occurred, make the encoded data // available to be copied out. if (state == THR_FINISH) { thr->outbuf->pos = out_pos; thr->outbuf->finished = true; } // Update the main progress info. thr->coder->progress_in += thr->outbuf->uncompressed_size; thr->coder->progress_out += out_pos; thr->progress_in = 0; thr->progress_out = 0; // Return this thread to the stack of free threads. thr->next = thr->coder->threads_free; thr->coder->threads_free = thr; mythread_cond_signal(&thr->coder->cond); } } // Exiting, free the resources. lzma_filters_free(thr->filters, thr->allocator); mythread_mutex_destroy(&thr->mutex); mythread_cond_destroy(&thr->cond); lzma_next_end(&thr->block_encoder, thr->allocator); lzma_free(thr->in, thr->allocator); return MYTHREAD_RET_VALUE; } /// Make the threads stop but not exit. Optionally wait for them to stop. static void threads_stop(lzma_stream_coder *coder, bool wait_for_threads) { // Tell the threads to stop. for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { coder->threads[i].state = THR_STOP; mythread_cond_signal(&coder->threads[i].cond); } } if (!wait_for_threads) return; // Wait for the threads to settle in the idle state. for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { while (coder->threads[i].state != THR_IDLE) mythread_cond_wait(&coder->threads[i].cond, &coder->threads[i].mutex); } } return; } /// Stop the threads and free the resources associated with them. /// Wait until the threads have exited. static void threads_end(lzma_stream_coder *coder, const lzma_allocator *allocator) { for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { coder->threads[i].state = THR_EXIT; mythread_cond_signal(&coder->threads[i].cond); } } for (uint32_t i = 0; i < coder->threads_initialized; ++i) { int ret = mythread_join(coder->threads[i].thread_id); assert(ret == 0); (void)ret; } lzma_free(coder->threads, allocator); return; } /// Initialize a new worker_thread structure and create a new thread. static lzma_ret initialize_new_thread(lzma_stream_coder *coder, const lzma_allocator *allocator) { worker_thread *thr = &coder->threads[coder->threads_initialized]; thr->in = lzma_alloc(coder->block_size, allocator); if (thr->in == NULL) return LZMA_MEM_ERROR; if (mythread_mutex_init(&thr->mutex)) goto error_mutex; if (mythread_cond_init(&thr->cond)) goto error_cond; thr->state = THR_IDLE; thr->allocator = allocator; thr->coder = coder; thr->progress_in = 0; thr->progress_out = 0; thr->block_encoder = LZMA_NEXT_CODER_INIT; thr->filters[0].id = LZMA_VLI_UNKNOWN; if (mythread_create(&thr->thread_id, &worker_start, thr)) goto error_thread; ++coder->threads_initialized; coder->thr = thr; return LZMA_OK; error_thread: mythread_cond_destroy(&thr->cond); error_cond: mythread_mutex_destroy(&thr->mutex); error_mutex: lzma_free(thr->in, allocator); return LZMA_MEM_ERROR; } static lzma_ret get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator) { // If there are no free output subqueues, there is no // point to try getting a thread. if (!lzma_outq_has_buf(&coder->outq)) return LZMA_OK; // That's also true if we cannot allocate memory for the output // buffer in the output queue. return_if_error(lzma_outq_prealloc_buf(&coder->outq, allocator, coder->outbuf_alloc_size)); // Make a thread-specific copy of the filter chain. Put it in // the cache array first so that if we cannot get a new thread yet, // the allocation is ready when we try again. if (coder->filters_cache[0].id == LZMA_VLI_UNKNOWN) return_if_error(lzma_filters_copy( coder->filters, coder->filters_cache, allocator)); // If there is a free structure on the stack, use it. mythread_sync(coder->mutex) { if (coder->threads_free != NULL) { coder->thr = coder->threads_free; coder->threads_free = coder->threads_free->next; } } if (coder->thr == NULL) { // If there are no uninitialized structures left, return. if (coder->threads_initialized == coder->threads_max) return LZMA_OK; // Initialize a new thread. return_if_error(initialize_new_thread(coder, allocator)); } // Reset the parts of the thread state that have to be done // in the main thread. mythread_sync(coder->thr->mutex) { coder->thr->state = THR_RUN; coder->thr->in_size = 0; coder->thr->outbuf = lzma_outq_get_buf(&coder->outq, NULL); // Free the old thread-specific filter options and replace // them with the already-allocated new options from // coder->filters_cache[]. Then mark the cache as empty. lzma_filters_free(coder->thr->filters, allocator); memcpy(coder->thr->filters, coder->filters_cache, sizeof(coder->filters_cache)); coder->filters_cache[0].id = LZMA_VLI_UNKNOWN; mythread_cond_signal(&coder->thr->cond); } return LZMA_OK; } static lzma_ret stream_encode_in(lzma_stream_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, lzma_action action) { while (*in_pos < in_size || (coder->thr != NULL && action != LZMA_RUN)) { if (coder->thr == NULL) { // Get a new thread. const lzma_ret ret = get_thread(coder, allocator); if (coder->thr == NULL) return ret; } // Copy the input data to thread's buffer. size_t thr_in_size = coder->thr->in_size; lzma_bufcpy(in, in_pos, in_size, coder->thr->in, &thr_in_size, coder->block_size); // Tell the Block encoder to finish if // - it has got block_size bytes of input; or // - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH, // or LZMA_FULL_BARRIER was used. // // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. const bool finish = thr_in_size == coder->block_size || (*in_pos == in_size && action != LZMA_RUN); bool block_error = false; mythread_sync(coder->thr->mutex) { if (coder->thr->state == THR_IDLE) { // Something has gone wrong with the Block // encoder. It has set coder->thread_error // which we will read a few lines later. block_error = true; } else { // Tell the Block encoder its new amount // of input and update the state if needed. coder->thr->in_size = thr_in_size; if (finish) coder->thr->state = THR_FINISH; mythread_cond_signal(&coder->thr->cond); } } if (block_error) { lzma_ret ret = LZMA_OK; // Init to silence a warning. mythread_sync(coder->mutex) { ret = coder->thread_error; } return ret; } if (finish) coder->thr = NULL; } return LZMA_OK; } /// Wait until more input can be consumed, more output can be read, or /// an optional timeout is reached. static bool wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs, bool *has_blocked, bool has_input) { if (coder->timeout != 0 && !*has_blocked) { // Every time when stream_encode_mt() is called via // lzma_code(), *has_blocked starts as false. We set it // to true here and calculate the absolute time when // we must return if there's nothing to do. // // This way if we block multiple times for short moments // less than "timeout" milliseconds, we will return once // "timeout" amount of time has passed since the *first* // blocking occurred. If the absolute time was calculated // again every time we block, "timeout" would effectively // be meaningless if we never consecutively block longer // than "timeout" ms. *has_blocked = true; mythread_condtime_set(wait_abs, &coder->cond, coder->timeout); } bool timed_out = false; mythread_sync(coder->mutex) { // There are four things that we wait. If one of them // becomes possible, we return. // - If there is input left, we need to get a free // worker thread and an output buffer for it. // - Data ready to be read from the output queue. // - A worker thread indicates an error. // - Time out occurs. while ((!has_input || coder->threads_free == NULL || !lzma_outq_has_buf(&coder->outq)) && !lzma_outq_is_readable(&coder->outq) && coder->thread_error == LZMA_OK && !timed_out) { if (coder->timeout != 0) timed_out = mythread_cond_timedwait( &coder->cond, &coder->mutex, wait_abs) != 0; else mythread_cond_wait(&coder->cond, &coder->mutex); } } return timed_out; } static lzma_ret stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_stream_coder *coder = coder_ptr; switch (coder->sequence) { case SEQ_STREAM_HEADER: lzma_bufcpy(coder->header, &coder->header_pos, sizeof(coder->header), out, out_pos, out_size); if (coder->header_pos < sizeof(coder->header)) return LZMA_OK; coder->header_pos = 0; coder->sequence = SEQ_BLOCK; // Fall through case SEQ_BLOCK: { // Initialized to silence warnings. lzma_vli unpadded_size = 0; lzma_vli uncompressed_size = 0; lzma_ret ret = LZMA_OK; // These are for wait_for_work(). bool has_blocked = false; - mythread_condtime wait_abs; + mythread_condtime wait_abs = { 0 }; while (true) { mythread_sync(coder->mutex) { // Check for Block encoder errors. ret = coder->thread_error; if (ret != LZMA_OK) { assert(ret != LZMA_STREAM_END); break; // Break out of mythread_sync. } // Try to read compressed data to out[]. ret = lzma_outq_read(&coder->outq, allocator, out, out_pos, out_size, &unpadded_size, &uncompressed_size); } if (ret == LZMA_STREAM_END) { // End of Block. Add it to the Index. ret = lzma_index_append(coder->index, allocator, unpadded_size, uncompressed_size); if (ret != LZMA_OK) { threads_stop(coder, false); return ret; } // If we didn't fill the output buffer yet, // try to read more data. Maybe the next // outbuf has been finished already too. if (*out_pos < out_size) continue; } if (ret != LZMA_OK) { // coder->thread_error was set. threads_stop(coder, false); return ret; } // Try to give uncompressed data to a worker thread. ret = stream_encode_in(coder, allocator, in, in_pos, in_size, action); if (ret != LZMA_OK) { threads_stop(coder, false); return ret; } // See if we should wait or return. // // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. if (*in_pos == in_size) { // LZMA_RUN: More data is probably coming // so return to let the caller fill the // input buffer. if (action == LZMA_RUN) return LZMA_OK; // LZMA_FULL_BARRIER: The same as with // LZMA_RUN but tell the caller that the // barrier was completed. if (action == LZMA_FULL_BARRIER) return LZMA_STREAM_END; // Finishing or flushing isn't completed until // all input data has been encoded and copied // to the output buffer. if (lzma_outq_is_empty(&coder->outq)) { // LZMA_FINISH: Continue to encode // the Index field. if (action == LZMA_FINISH) break; // LZMA_FULL_FLUSH: Return to tell // the caller that flushing was // completed. if (action == LZMA_FULL_FLUSH) return LZMA_STREAM_END; } } // Return if there is no output space left. // This check must be done after testing the input // buffer, because we might want to use a different // return code. if (*out_pos == out_size) return LZMA_OK; // Neither in nor out has been used completely. // Wait until there's something we can do. if (wait_for_work(coder, &wait_abs, &has_blocked, *in_pos < in_size)) return LZMA_TIMED_OUT; } // All Blocks have been encoded and the threads have stopped. // Prepare to encode the Index field. return_if_error(lzma_index_encoder_init( &coder->index_encoder, allocator, coder->index)); coder->sequence = SEQ_INDEX; // Update the progress info to take the Index and // Stream Footer into account. Those are very fast to encode // so in terms of progress information they can be thought // to be ready to be copied out. coder->progress_out += lzma_index_size(coder->index) + LZMA_STREAM_HEADER_SIZE; } // Fall through case SEQ_INDEX: { // Call the Index encoder. It doesn't take any input, so // those pointers can be NULL. const lzma_ret ret = coder->index_encoder.code( coder->index_encoder.coder, allocator, NULL, NULL, 0, out, out_pos, out_size, LZMA_RUN); if (ret != LZMA_STREAM_END) return ret; // Encode the Stream Footer into coder->buffer. coder->stream_flags.backward_size = lzma_index_size(coder->index); if (lzma_stream_footer_encode(&coder->stream_flags, coder->header) != LZMA_OK) return LZMA_PROG_ERROR; coder->sequence = SEQ_STREAM_FOOTER; } // Fall through case SEQ_STREAM_FOOTER: lzma_bufcpy(coder->header, &coder->header_pos, sizeof(coder->header), out, out_pos, out_size); return coder->header_pos < sizeof(coder->header) ? LZMA_OK : LZMA_STREAM_END; } assert(0); return LZMA_PROG_ERROR; } static void stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_stream_coder *coder = coder_ptr; // Threads must be killed before the output queue can be freed. threads_end(coder, allocator); lzma_outq_end(&coder->outq, allocator); lzma_filters_free(coder->filters, allocator); lzma_filters_free(coder->filters_cache, allocator); lzma_next_end(&coder->index_encoder, allocator); lzma_index_end(coder->index, allocator); mythread_cond_destroy(&coder->cond); mythread_mutex_destroy(&coder->mutex); lzma_free(coder, allocator); return; } static lzma_ret stream_encoder_mt_update(void *coder_ptr, const lzma_allocator *allocator, const lzma_filter *filters, const lzma_filter *reversed_filters lzma_attribute((__unused__))) { lzma_stream_coder *coder = coder_ptr; // Applications shouldn't attempt to change the options when // we are already encoding the Index or Stream Footer. if (coder->sequence > SEQ_BLOCK) return LZMA_PROG_ERROR; // For now the threaded encoder doesn't support changing // the options in the middle of a Block. if (coder->thr != NULL) return LZMA_PROG_ERROR; // Check if the filter chain seems mostly valid. See the comment // in stream_encoder_mt_init(). if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) return LZMA_OPTIONS_ERROR; // Make a copy to a temporary buffer first. This way the encoder // state stays unchanged if an error occurs in lzma_filters_copy(). lzma_filter temp[LZMA_FILTERS_MAX + 1]; return_if_error(lzma_filters_copy(filters, temp, allocator)); // Free the options of the old chain as well as the cache. lzma_filters_free(coder->filters, allocator); lzma_filters_free(coder->filters_cache, allocator); // Copy the new filter chain in place. memcpy(coder->filters, temp, sizeof(temp)); return LZMA_OK; } /// Options handling for lzma_stream_encoder_mt_init() and /// lzma_stream_encoder_mt_memusage() static lzma_ret get_options(const lzma_mt *options, lzma_options_easy *opt_easy, const lzma_filter **filters, uint64_t *block_size, uint64_t *outbuf_size_max) { // Validate some of the options. if (options == NULL) return LZMA_PROG_ERROR; if (options->flags != 0 || options->threads == 0 || options->threads > LZMA_THREADS_MAX) return LZMA_OPTIONS_ERROR; if (options->filters != NULL) { // Filter chain was given, use it as is. *filters = options->filters; } else { // Use a preset. if (lzma_easy_preset(opt_easy, options->preset)) return LZMA_OPTIONS_ERROR; *filters = opt_easy->filters; } // Block size if (options->block_size > 0) { if (options->block_size > BLOCK_SIZE_MAX) return LZMA_OPTIONS_ERROR; *block_size = options->block_size; } else { // Determine the Block size from the filter chain. *block_size = lzma_mt_block_size(*filters); if (*block_size == 0) return LZMA_OPTIONS_ERROR; assert(*block_size <= BLOCK_SIZE_MAX); } // Calculate the maximum amount output that a single output buffer // may need to hold. This is the same as the maximum total size of // a Block. *outbuf_size_max = lzma_block_buffer_bound64(*block_size); if (*outbuf_size_max == 0) return LZMA_MEM_ERROR; return LZMA_OK; } static void get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out) { lzma_stream_coder *coder = coder_ptr; // Lock coder->mutex to prevent finishing threads from moving their // progress info from the worker_thread structure to lzma_stream_coder. mythread_sync(coder->mutex) { *progress_in = coder->progress_in; *progress_out = coder->progress_out; for (size_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { *progress_in += coder->threads[i].progress_in; *progress_out += coder->threads[i] .progress_out; } } } return; } static lzma_ret stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_mt *options) { lzma_next_coder_init(&stream_encoder_mt_init, next, allocator); // Get the filter chain. lzma_options_easy easy; const lzma_filter *filters; uint64_t block_size; uint64_t outbuf_size_max; return_if_error(get_options(options, &easy, &filters, &block_size, &outbuf_size_max)); #if SIZE_MAX < UINT64_MAX if (block_size > SIZE_MAX || outbuf_size_max > SIZE_MAX) return LZMA_MEM_ERROR; #endif // Validate the filter chain so that we can give an error in this // function instead of delaying it to the first call to lzma_code(). // The memory usage calculation verifies the filter chain as // a side effect so we take advantage of that. It's not a perfect // check though as raw encoder allows LZMA1 too but such problems // will be caught eventually with Block Header encoder. if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) return LZMA_OPTIONS_ERROR; // Validate the Check ID. if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return LZMA_PROG_ERROR; if (!lzma_check_is_supported(options->check)) return LZMA_UNSUPPORTED_CHECK; // Allocate and initialize the base structure if needed. lzma_stream_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; // For the mutex and condition variable initializations // the error handling has to be done here because // stream_encoder_mt_end() doesn't know if they have // already been initialized or not. if (mythread_mutex_init(&coder->mutex)) { lzma_free(coder, allocator); next->coder = NULL; return LZMA_MEM_ERROR; } if (mythread_cond_init(&coder->cond)) { mythread_mutex_destroy(&coder->mutex); lzma_free(coder, allocator); next->coder = NULL; return LZMA_MEM_ERROR; } next->code = &stream_encode_mt; next->end = &stream_encoder_mt_end; next->get_progress = &get_progress; next->update = &stream_encoder_mt_update; coder->filters[0].id = LZMA_VLI_UNKNOWN; coder->filters_cache[0].id = LZMA_VLI_UNKNOWN; coder->index_encoder = LZMA_NEXT_CODER_INIT; coder->index = NULL; memzero(&coder->outq, sizeof(coder->outq)); coder->threads = NULL; coder->threads_max = 0; coder->threads_initialized = 0; } // Basic initializations coder->sequence = SEQ_STREAM_HEADER; coder->block_size = (size_t)(block_size); coder->outbuf_alloc_size = (size_t)(outbuf_size_max); coder->thread_error = LZMA_OK; coder->thr = NULL; // Allocate the thread-specific base structures. assert(options->threads > 0); if (coder->threads_max != options->threads) { threads_end(coder, allocator); coder->threads = NULL; coder->threads_max = 0; coder->threads_initialized = 0; coder->threads_free = NULL; coder->threads = lzma_alloc( options->threads * sizeof(worker_thread), allocator); if (coder->threads == NULL) return LZMA_MEM_ERROR; coder->threads_max = options->threads; } else { // Reuse the old structures and threads. Tell the running // threads to stop and wait until they have stopped. threads_stop(coder, true); } // Output queue return_if_error(lzma_outq_init(&coder->outq, allocator, options->threads)); // Timeout coder->timeout = options->timeout; // Free the old filter chain and the cache. lzma_filters_free(coder->filters, allocator); lzma_filters_free(coder->filters_cache, allocator); // Copy the new filter chain. return_if_error(lzma_filters_copy( filters, coder->filters, allocator)); // Index lzma_index_end(coder->index, allocator); coder->index = lzma_index_init(allocator); if (coder->index == NULL) return LZMA_MEM_ERROR; // Stream Header coder->stream_flags.version = 0; coder->stream_flags.check = options->check; return_if_error(lzma_stream_header_encode( &coder->stream_flags, coder->header)); coder->header_pos = 0; // Progress info coder->progress_in = 0; coder->progress_out = LZMA_STREAM_HEADER_SIZE; return LZMA_OK; } #ifdef HAVE_SYMBOL_VERSIONS_LINUX // These are for compatibility with binaries linked against liblzma that // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. // Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2 // but it has been added here anyway since someone might misread the // RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist. LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha", lzma_ret, lzma_stream_encoder_mt_512a)( lzma_stream *strm, const lzma_mt *options) lzma_nothrow lzma_attr_warn_unused_result __attribute__((__alias__("lzma_stream_encoder_mt_52"))); LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2", lzma_ret, lzma_stream_encoder_mt_522)( lzma_stream *strm, const lzma_mt *options) lzma_nothrow lzma_attr_warn_unused_result __attribute__((__alias__("lzma_stream_encoder_mt_52"))); LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2", lzma_ret, lzma_stream_encoder_mt_52)( lzma_stream *strm, const lzma_mt *options) lzma_nothrow lzma_attr_warn_unused_result; #define lzma_stream_encoder_mt lzma_stream_encoder_mt_52 #endif extern LZMA_API(lzma_ret) lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options) { lzma_next_strm_init(stream_encoder_mt_init, strm, options); strm->internal->supported_actions[LZMA_RUN] = true; // strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; strm->internal->supported_actions[LZMA_FULL_BARRIER] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } #ifdef HAVE_SYMBOL_VERSIONS_LINUX LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha", uint64_t, lzma_stream_encoder_mt_memusage_512a)( const lzma_mt *options) lzma_nothrow lzma_attr_pure __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52"))); LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2", uint64_t, lzma_stream_encoder_mt_memusage_522)( const lzma_mt *options) lzma_nothrow lzma_attr_pure __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52"))); LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2", uint64_t, lzma_stream_encoder_mt_memusage_52)( const lzma_mt *options) lzma_nothrow lzma_attr_pure; #define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52 #endif // This function name is a monster but it's consistent with the older // monster names. :-( 31 chars is the max that C99 requires so in that // sense it's not too long. ;-) extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage(const lzma_mt *options) { lzma_options_easy easy; const lzma_filter *filters; uint64_t block_size; uint64_t outbuf_size_max; if (get_options(options, &easy, &filters, &block_size, &outbuf_size_max) != LZMA_OK) return UINT64_MAX; // Memory usage of the input buffers const uint64_t inbuf_memusage = options->threads * block_size; // Memory usage of the filter encoders uint64_t filters_memusage = lzma_raw_encoder_memusage(filters); if (filters_memusage == UINT64_MAX) return UINT64_MAX; filters_memusage *= options->threads; // Memory usage of the output queue const uint64_t outq_memusage = lzma_outq_memusage( outbuf_size_max, options->threads); if (outq_memusage == UINT64_MAX) return UINT64_MAX; // Sum them with overflow checking. uint64_t total_memusage = LZMA_MEMUSAGE_BASE + sizeof(lzma_stream_coder) + options->threads * sizeof(worker_thread); if (UINT64_MAX - total_memusage < inbuf_memusage) return UINT64_MAX; total_memusage += inbuf_memusage; if (UINT64_MAX - total_memusage < filters_memusage) return UINT64_MAX; total_memusage += filters_memusage; if (UINT64_MAX - total_memusage < outq_memusage) return UINT64_MAX; return total_memusage + outq_memusage; } diff --git a/contrib/xz/src/liblzma/common/string_conversion.c b/contrib/xz/src/liblzma/common/string_conversion.c index 0d09053f49f8..d2c1e80936b2 100644 --- a/contrib/xz/src/liblzma/common/string_conversion.c +++ b/contrib/xz/src/liblzma/common/string_conversion.c @@ -1,1324 +1,1324 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file string_conversion.c /// \brief Conversion of strings to filter chain and vice versa // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "filter_common.h" ///////////////////// // String building // ///////////////////// /// How much memory to allocate for strings. For now, no realloc is used /// so this needs to be big enough even though there of course is /// an overflow check still. /// /// FIXME? Using a fixed size is wasteful if the application doesn't free /// the string fairly quickly but this can be improved later if needed. #define STR_ALLOC_SIZE 800 typedef struct { char *buf; size_t pos; } lzma_str; static lzma_ret str_init(lzma_str *str, const lzma_allocator *allocator) { str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); if (str->buf == NULL) return LZMA_MEM_ERROR; str->pos = 0; return LZMA_OK; } static void str_free(lzma_str *str, const lzma_allocator *allocator) { lzma_free(str->buf, allocator); return; } static bool str_is_full(const lzma_str *str) { return str->pos == STR_ALLOC_SIZE - 1; } static lzma_ret str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) { if (str_is_full(str)) { // The preallocated buffer was too small. // This shouldn't happen as STR_ALLOC_SIZE should // be adjusted if new filters are added. lzma_free(str->buf, allocator); *dest = NULL; assert(0); return LZMA_PROG_ERROR; } str->buf[str->pos] = '\0'; *dest = str->buf; return LZMA_OK; } static void str_append_str(lzma_str *str, const char *s) { const size_t len = strlen(s); const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; const size_t copy_size = my_min(len, limit); memcpy(str->buf + str->pos, s, copy_size); str->pos += copy_size; return; } static void str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) { if (v == 0) { str_append_str(str, "0"); } else { // NOTE: Don't use plain "B" because xz and the parser in this // file don't support it and at glance it may look like 8 // (there cannot be a space before the suffix). static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; size_t suf = 0; if (use_byte_suffix) { while ((v & 1023) == 0 && suf < ARRAY_SIZE(suffixes) - 1) { v >>= 10; ++suf; } } // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember // that initializing to "" initializes all elements to // zero so '\0'-termination gets handled by this. char buf[16] = ""; size_t pos = sizeof(buf) - 1; do { buf[--pos] = '0' + (v % 10); v /= 10; } while (v != 0); str_append_str(str, buf + pos); str_append_str(str, suffixes[suf]); } return; } ////////////////////////////////////////////// // Parsing and stringification declarations // ////////////////////////////////////////////// /// Maximum length for filter and option names. /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes #define NAME_LEN_MAX 11 /// For option_map.flags: Use .u.map to do convert the input value /// to an integer. Without this flag, .u.range.{min,max} are used /// as the allowed range for the integer. #define OPTMAP_USE_NAME_VALUE_MAP 0x01 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in /// the stringified output if the value is an exact multiple of these. /// This is used e.g. for LZMA1/2 dictionary size. #define OPTMAP_USE_BYTE_SUFFIX 0x02 /// For option_map.flags: If the integer value is zero then this option /// won't be included in the stringified output. It's used e.g. for /// BCJ filter start offset which usually is zero. #define OPTMAP_NO_STRFY_ZERO 0x04 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, /// it doesn't need to be specified in the initializers as it is /// the implicit value. enum { OPTMAP_TYPE_UINT32, OPTMAP_TYPE_LZMA_MODE, OPTMAP_TYPE_LZMA_MATCH_FINDER, OPTMAP_TYPE_LZMA_PRESET, }; /// This is for mapping string values in options to integers. /// The last element of an array must have "" as the name. /// It's used e.g. for match finder names in LZMA1/2. typedef struct { const char name[NAME_LEN_MAX + 1]; const uint32_t value; } name_value_map; /// Each filter that has options needs an array of option_map structures. /// The array doesn't need to be terminated as the functions take the /// length of the array as an argument. /// /// When converting a string to filter options structure, option values /// will be handled in a few different ways: /// /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string /// is handled specially. /// /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is /// converted to an integer using the name_value_map pointed by .u.map. /// The last element in .u.map must have .name = "" as the terminator. /// /// (3) Otherwise the string is treated as a non-negative unsigned decimal /// integer which must be in the range set in .u.range. If .flags has /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. /// /// The integer value from (2) or (3) is then stored to filter_options /// at the offset specified in .offset using the type specified in .type /// (default is uint32_t). /// /// Stringifying a filter is done by processing a given number of options -/// in oder from the beginning of an option_map array. The integer is +/// in order from the beginning of an option_map array. The integer is /// read from filter_options at .offset using the type from .type. /// /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the /// option is skipped. /// /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used /// to convert the option to a string. If the map doesn't contain a string /// for the integer value then "UNKNOWN" is used. /// /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, /// MiB, or GiB suffix is used if the value is an exact multiple of these. /// Plain "B" suffix is never used. typedef struct { char name[NAME_LEN_MAX + 1]; uint8_t type; uint8_t flags; uint16_t offset; union { struct { uint32_t min; uint32_t max; } range; const name_value_map *map; } u; } option_map; static const char *parse_options(const char **const str, const char *str_end, void *filter_options, const option_map *const optmap, const size_t optmap_size); ///////// // BCJ // ///////// #if defined(HAVE_ENCODER_X86) \ || defined(HAVE_DECODER_X86) \ || defined(HAVE_ENCODER_ARM) \ || defined(HAVE_DECODER_ARM) \ || defined(HAVE_ENCODER_ARMTHUMB) \ || defined(HAVE_DECODER_ARMTHUMB) \ || defined(HAVE_ENCODER_ARM64) \ || defined(HAVE_DECODER_ARM64) \ || defined(HAVE_ENCODER_POWERPC) \ || defined(HAVE_DECODER_POWERPC) \ || defined(HAVE_ENCODER_IA64) \ || defined(HAVE_DECODER_IA64) \ || defined(HAVE_ENCODER_SPARC) \ || defined(HAVE_DECODER_SPARC) static const option_map bcj_optmap[] = { { .name = "start", .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, .offset = offsetof(lzma_options_bcj, start_offset), .u.range.min = 0, .u.range.max = UINT32_MAX, } }; static const char * parse_bcj(const char **const str, const char *str_end, void *filter_options) { // filter_options was zeroed on allocation and that is enough // for the default value. return parse_options(str, str_end, filter_options, bcj_optmap, ARRAY_SIZE(bcj_optmap)); } #endif /////////// // Delta // /////////// #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) static const option_map delta_optmap[] = { { .name = "dist", .offset = offsetof(lzma_options_delta, dist), .u.range.min = LZMA_DELTA_DIST_MIN, .u.range.max = LZMA_DELTA_DIST_MAX, } }; static const char * parse_delta(const char **const str, const char *str_end, void *filter_options) { lzma_options_delta *opts = filter_options; opts->type = LZMA_DELTA_TYPE_BYTE; opts->dist = LZMA_DELTA_DIST_MIN; return parse_options(str, str_end, filter_options, delta_optmap, ARRAY_SIZE(delta_optmap)); } #endif /////////////////// // LZMA1 & LZMA2 // /////////////////// /// Help string for presets #define LZMA12_PRESET_STR "0-9[e]" static const char * parse_lzma12_preset(const char **const str, const char *str_end, uint32_t *preset) { assert(*str < str_end); *preset = (uint32_t)(**str - '0'); // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! while (++*str < str_end) { switch (**str) { case 'e': *preset |= LZMA_PRESET_EXTREME; break; default: return "Unsupported preset flag"; } } return NULL; } static const char * set_lzma12_preset(const char **const str, const char *str_end, void *filter_options) { uint32_t preset; const char *errmsg = parse_lzma12_preset(str, str_end, &preset); if (errmsg != NULL) return errmsg; lzma_options_lzma *opts = filter_options; if (lzma_lzma_preset(opts, preset)) return "Unsupported preset"; return NULL; } static const name_value_map lzma12_mode_map[] = { { "fast", LZMA_MODE_FAST }, { "normal", LZMA_MODE_NORMAL }, { "", 0 } }; static const name_value_map lzma12_mf_map[] = { { "hc3", LZMA_MF_HC3 }, { "hc4", LZMA_MF_HC4 }, { "bt2", LZMA_MF_BT2 }, { "bt3", LZMA_MF_BT3 }, { "bt4", LZMA_MF_BT4 }, { "", 0 } }; static const option_map lzma12_optmap[] = { { .name = "preset", .type = OPTMAP_TYPE_LZMA_PRESET, }, { .name = "dict", .flags = OPTMAP_USE_BYTE_SUFFIX, .offset = offsetof(lzma_options_lzma, dict_size), .u.range.min = LZMA_DICT_SIZE_MIN, // FIXME? The max is really max for encoding but decoding // would allow 4 GiB - 1 B. .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), }, { .name = "lc", .offset = offsetof(lzma_options_lzma, lc), .u.range.min = LZMA_LCLP_MIN, .u.range.max = LZMA_LCLP_MAX, }, { .name = "lp", .offset = offsetof(lzma_options_lzma, lp), .u.range.min = LZMA_LCLP_MIN, .u.range.max = LZMA_LCLP_MAX, }, { .name = "pb", .offset = offsetof(lzma_options_lzma, pb), .u.range.min = LZMA_PB_MIN, .u.range.max = LZMA_PB_MAX, }, { .name = "mode", .type = OPTMAP_TYPE_LZMA_MODE, .flags = OPTMAP_USE_NAME_VALUE_MAP, .offset = offsetof(lzma_options_lzma, mode), .u.map = lzma12_mode_map, }, { .name = "nice", .offset = offsetof(lzma_options_lzma, nice_len), .u.range.min = 2, .u.range.max = 273, }, { .name = "mf", .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, .flags = OPTMAP_USE_NAME_VALUE_MAP, .offset = offsetof(lzma_options_lzma, mf), .u.map = lzma12_mf_map, }, { .name = "depth", .offset = offsetof(lzma_options_lzma, depth), .u.range.min = 0, .u.range.max = UINT32_MAX, } }; static const char * parse_lzma12(const char **const str, const char *str_end, void *filter_options) { lzma_options_lzma *opts = filter_options; // It cannot fail. const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); assert(!preset_ret); (void)preset_ret; const char *errmsg = parse_options(str, str_end, filter_options, lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); if (errmsg != NULL) return errmsg; if (opts->lc + opts->lp > LZMA_LCLP_MAX) return "The sum of lc and lp must not exceed 4"; return NULL; } ///////////////////////////////////////// // Generic parsing and stringification // ///////////////////////////////////////// static const struct { /// Name of the filter char name[NAME_LEN_MAX + 1]; /// For lzma_str_to_filters: /// Size of the filter-specific options structure. uint32_t opts_size; /// Filter ID lzma_vli id; /// For lzma_str_to_filters: /// Function to parse the filter-specific options. The filter_options /// will already have been allocated using lzma_alloc_zero(). const char *(*parse)(const char **str, const char *str_end, void *filter_options); /// For lzma_str_from_filters: /// If the flag LZMA_STR_ENCODER is used then the first /// strfy_encoder elements of optmap are stringified. /// With LZMA_STR_DECODER strfy_decoder is used. - /// Currently encoders use all flags that decoders do but if + /// Currently encoders use all options that decoders do but if /// that changes then this needs to be changed too, for example, - /// add a new OPTMAP flag to skip printing some decoder-only flags. + /// add a new OPTMAP flag to skip printing some decoder-only options. const option_map *optmap; uint8_t strfy_encoder; uint8_t strfy_decoder; /// For lzma_str_from_filters: /// If true, lzma_filter.options is allowed to be NULL. In that case, /// only the filter name is printed without any options. bool allow_null; } filter_name_map[] = { #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, &parse_lzma12, lzma12_optmap, 9, 5, false }, #endif #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, &parse_lzma12, lzma12_optmap, 9, 2, false }, #endif #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, &parse_bcj, bcj_optmap, 1, 1, true }, #endif #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, &parse_delta, delta_optmap, 1, 1, false }, #endif }; /// Decodes options from a string for one filter (name1=value1,name2=value2). /// Caller must have allocated memory for filter_options already and set /// the initial default values. This is called from the filter-specific /// parse_* functions. /// /// The input string starts at *str and the address in str_end is the first /// char that is not part of the string anymore. So no '\0' terminator is -/// used. *str is advanced everytime something has been decoded successfully. +/// used. *str is advanced every time something has been decoded successfully. static const char * parse_options(const char **const str, const char *str_end, void *filter_options, const option_map *const optmap, const size_t optmap_size) { while (*str < str_end && **str != '\0') { // Each option is of the form name=value. // Commas (',') separate options. Extra commas are ignored. // Ignoring extra commas makes it simpler if an optional // option stored in a shell variable which can be empty. if (**str == ',') { ++*str; continue; } // Find where the next name=value ends. const size_t str_len = (size_t)(str_end - *str); const char *name_eq_value_end = memchr(*str, ',', str_len); if (name_eq_value_end == NULL) name_eq_value_end = str_end; const char *equals_sign = memchr(*str, '=', (size_t)(name_eq_value_end - *str)); // Fail if the '=' wasn't found or the option name is missing // (the first char is '='). if (equals_sign == NULL || **str == '=') return "Options must be 'name=value' pairs separated " "with commas"; // Reject a too long option name so that the memcmp() // in the loop below won't read past the end of the // string in optmap[i].name. const size_t name_len = (size_t)(equals_sign - *str); if (name_len > NAME_LEN_MAX) return "Unknown option name"; // Find the option name from optmap[]. size_t i = 0; while (true) { if (i == optmap_size) return "Unknown option name"; if (memcmp(*str, optmap[i].name, name_len) == 0 && optmap[i].name[name_len] == '\0') break; ++i; } // The input string is good at least until the start of // the option value. *str = equals_sign + 1; // The code assumes that the option value isn't an empty // string so check it here. const size_t value_len = (size_t)(name_eq_value_end - *str); if (value_len == 0) return "Option value cannot be empty"; // LZMA1/2 preset has its own parsing function. if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { const char *errmsg = set_lzma12_preset(str, name_eq_value_end, filter_options); if (errmsg != NULL) return errmsg; continue; } // It's an integer value. uint32_t v; if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { // The integer is picked from a string-to-integer map. // // Reject a too long value string so that the memcmp() // in the loop below won't read past the end of the // string in optmap[i].u.map[j].name. if (value_len > NAME_LEN_MAX) return "Invalid option value"; const name_value_map *map = optmap[i].u.map; size_t j = 0; while (true) { // The array is terminated with an empty name. if (map[j].name[0] == '\0') return "Invalid option value"; if (memcmp(*str, map[j].name, value_len) == 0 && map[j].name[value_len] == '\0') { v = map[j].value; break; } ++j; } } else if (**str < '0' || **str > '9') { // Note that "max" isn't supported while it is // supported in xz. It's not useful here. return "Value is not a non-negative decimal integer"; } else { // strtoul() has locale-specific behavior so it cannot // be relied on to get reproducible results since we // cannot change the locate in a thread-safe library. // It also needs '\0'-termination. // // Use a temporary pointer so that *str will point // to the beginning of the value string in case // an error occurs. const char *p = *str; v = 0; do { if (v > UINT32_MAX / 10) return "Value out of range"; v *= 10; const uint32_t add = (uint32_t)(*p - '0'); if (UINT32_MAX - add < v) return "Value out of range"; v += add; ++p; } while (p < name_eq_value_end && *p >= '0' && *p <= '9'); if (p < name_eq_value_end) { // Remember this position so that it can be // used for error messages that are // specifically about the suffix. (Out of // range values are about the whole value // and those error messages point to the // beginning of the number part, // not to the suffix.) const char *multiplier_start = p; // If multiplier suffix shouldn't be used // then don't allow them even if the value // would stay within limits. This is a somewhat // unnecessary check but it rejects silly // things like lzma2:pb=0MiB which xz allows. if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) == 0) { *str = multiplier_start; return "This option does not support " "any integer suffixes"; } uint32_t shift; switch (*p) { case 'k': case 'K': shift = 10; break; case 'm': case 'M': shift = 20; break; case 'g': case 'G': shift = 30; break; default: *str = multiplier_start; return "Invalid multiplier suffix " "(KiB, MiB, or GiB)"; } ++p; // Allow "M", "Mi", "MB", "MiB" and the same // for the other five characters from the // switch-statement above. All are handled // as base-2 (perhaps a mistake, perhaps not). // Note that 'i' and 'B' are case sensitive. if (p < name_eq_value_end && *p == 'i') ++p; if (p < name_eq_value_end && *p == 'B') ++p; // Now we must have no chars remaining. if (p < name_eq_value_end) { *str = multiplier_start; return "Invalid multiplier suffix " "(KiB, MiB, or GiB)"; } if (v > (UINT32_MAX >> shift)) return "Value out of range"; v <<= shift; } if (v < optmap[i].u.range.min || v > optmap[i].u.range.max) return "Value out of range"; } // Set the value in filter_options. Enums are handled // specially since the underlying type isn't the same // as uint32_t on all systems. void *ptr = (char *)filter_options + optmap[i].offset; switch (optmap[i].type) { case OPTMAP_TYPE_LZMA_MODE: *(lzma_mode *)ptr = (lzma_mode)v; break; case OPTMAP_TYPE_LZMA_MATCH_FINDER: *(lzma_match_finder *)ptr = (lzma_match_finder)v; break; default: *(uint32_t *)ptr = v; break; } // This option has been successfully handled. *str = name_eq_value_end; } // No errors. return NULL; } /// Finds the name of the filter at the beginning of the string and /// calls filter_name_map[i].parse() to decode the filter-specific options. /// The caller must have set str_end so that exactly one filter and its /// options are present without any trailing characters. static const char * parse_filter(const char **const str, const char *str_end, lzma_filter *filter, const lzma_allocator *allocator, bool only_xz) { // Search for a colon or equals sign that would separate the filter // name from filter options. If neither is found, then the input // string only contains a filter name and there are no options. // // First assume that a colon or equals sign won't be found: const char *name_end = str_end; const char *opts_start = str_end; for (const char *p = *str; p < str_end; ++p) { if (*p == ':' || *p == '=') { name_end = p; // Filter options (name1=value1,name2=value2,...) // begin after the colon or equals sign. opts_start = p + 1; break; } } // Reject a too long filter name so that the memcmp() // in the loop below won't read past the end of the // string in filter_name_map[i].name. const size_t name_len = (size_t)(name_end - *str); if (name_len > NAME_LEN_MAX) return "Unknown filter name"; for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { if (memcmp(*str, filter_name_map[i].name, name_len) == 0 && filter_name_map[i].name[name_len] == '\0') { if (only_xz && filter_name_map[i].id >= LZMA_FILTER_RESERVED_START) return "This filter cannot be used in " "the .xz format"; // Allocate the filter-specific options and // initialize the memory with zeros. void *options = lzma_alloc_zero( filter_name_map[i].opts_size, allocator); if (options == NULL) return "Memory allocation failed"; // Filter name was found so the input string is good // at least this far. *str = opts_start; const char *errmsg = filter_name_map[i].parse( str, str_end, options); if (errmsg != NULL) { lzma_free(options, allocator); return errmsg; } // *filter is modified only when parsing is successful. filter->id = filter_name_map[i].id; filter->options = options; return NULL; } } return "Unknown filter name"; } /// Converts the string to a filter chain (array of lzma_filter structures). /// -/// *str is advanced everytime something has been decoded successfully. +/// *str is advanced every time something has been decoded successfully. /// This way the caller knows where in the string a possible error occurred. static const char * str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) { const char *errmsg; // Skip leading spaces. while (**str == ' ') ++*str; if (**str == '\0') return "Empty string is not allowed, " "try \"6\" if a default value is needed"; // Detect the type of the string. // // A string beginning with a digit or a string beginning with // one dash and a digit are treated as presets. Trailing spaces // will be ignored too (leading spaces were already ignored above). // // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. // Strings like "-" or "- " aren't preset. #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { if (**str == '-') ++*str; // Ignore trailing spaces. const size_t str_len = strlen(*str); const char *str_end = memchr(*str, ' ', str_len); if (str_end != NULL) { // There is at least one trailing space. Check that // there are no chars other than spaces. for (size_t i = 1; str_end[i] != '\0'; ++i) if (str_end[i] != ' ') return "Unsupported preset"; } else { // There are no trailing spaces. Use the whole string. str_end = *str + str_len; } uint32_t preset; errmsg = parse_lzma12_preset(str, str_end, &preset); if (errmsg != NULL) return errmsg; lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); if (opts == NULL) return "Memory allocation failed"; if (lzma_lzma_preset(opts, preset)) { lzma_free(opts, allocator); return "Unsupported preset"; } filters[0].id = LZMA_FILTER_LZMA2; filters[0].options = opts; filters[1].id = LZMA_VLI_UNKNOWN; filters[1].options = NULL; return NULL; } // Not a preset so it must be a filter chain. // // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that // can be used in .xz. const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; // Use a temporary array so that we don't modify the caller-supplied // one until we know that no errors occurred. lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; size_t i = 0; do { if (i == LZMA_FILTERS_MAX) { errmsg = "The maximum number of filters is four"; goto error; } // Skip "--" if present. if ((*str)[0] == '-' && (*str)[1] == '-') *str += 2; // Locate the end of "filter:name1=value1,name2=value2", // stopping at the first "--" or a single space. const char *filter_end = *str; while (filter_end[0] != '\0') { if ((filter_end[0] == '-' && filter_end[1] == '-') || filter_end[0] == ' ') break; ++filter_end; } // Inputs that have "--" at the end or "-- " in the middle // will result in an empty filter name. if (filter_end == *str) { errmsg = "Filter name is missing"; goto error; } errmsg = parse_filter(str, filter_end, &temp_filters[i], allocator, only_xz); if (errmsg != NULL) goto error; // Skip trailing spaces. while (**str == ' ') ++*str; ++i; } while (**str != '\0'); // Seems to be good, terminate the array so that // basic validation can be done. temp_filters[i].id = LZMA_VLI_UNKNOWN; temp_filters[i].options = NULL; // Do basic validation if the application didn't prohibit it. if ((flags & LZMA_STR_NO_VALIDATION) == 0) { size_t dummy; const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); if (ret != LZMA_OK) { errmsg = "Invalid filter chain " "('lzma2' missing at the end?)"; goto error; } } // All good. Copy the filters to the application supplied array. memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); return NULL; error: // Free the filter options that were successfully decoded. while (i-- > 0) lzma_free(temp_filters[i].options, allocator); return errmsg; } extern LZMA_API(const char *) lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) { if (str == NULL || filters == NULL) return "Unexpected NULL pointer argument(s) " "to lzma_str_to_filters()"; // Validate the flags. const uint32_t supported_flags = LZMA_STR_ALL_FILTERS | LZMA_STR_NO_VALIDATION; if (flags & ~supported_flags) return "Unsupported flags to lzma_str_to_filters()"; const char *used = str; const char *errmsg = str_to_filters(&used, filters, flags, allocator); if (error_pos != NULL) { const size_t n = (size_t)(used - str); *error_pos = n > INT_MAX ? INT_MAX : (int)n; } return errmsg; } /// Converts options of one filter to a string. /// /// The caller must have already put the filter name in the destination /// string. Since it is possible that no options will be needed, the caller /// won't have put a delimiter character (':' or '=') in the string yet. /// We will add it if at least one option will be added to the string. static void strfy_filter(lzma_str *dest, const char *delimiter, const option_map *optmap, size_t optmap_count, const void *filter_options) { for (size_t i = 0; i < optmap_count; ++i) { // No attempt is made to reverse LZMA1/2 preset. if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) continue; // All options have integer values, some just are mapped // to a string with a name_value_map. LZMA1/2 preset // isn't reversed back to preset=PRESET form. uint32_t v; const void *ptr = (const char *)filter_options + optmap[i].offset; switch (optmap[i].type) { case OPTMAP_TYPE_LZMA_MODE: v = *(const lzma_mode *)ptr; break; case OPTMAP_TYPE_LZMA_MATCH_FINDER: v = *(const lzma_match_finder *)ptr; break; default: v = *(const uint32_t *)ptr; break; } // Skip this if this option should be omitted from // the string when the value is zero. if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) continue; // Before the first option we add whatever delimiter // the caller gave us. For later options a comma is used. str_append_str(dest, delimiter); delimiter = ","; // Add the option name and equals sign. str_append_str(dest, optmap[i].name); str_append_str(dest, "="); if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { const name_value_map *map = optmap[i].u.map; size_t j = 0; while (true) { if (map[j].name[0] == '\0') { str_append_str(dest, "UNKNOWN"); break; } if (map[j].value == v) { str_append_str(dest, map[j].name); break; } ++j; } } else { str_append_u32(dest, v, optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); } } return; } extern LZMA_API(lzma_ret) lzma_str_from_filters(char **output_str, const lzma_filter *filters, uint32_t flags, const lzma_allocator *allocator) { // On error *output_str is always set to NULL. // Do it as the very first step. if (output_str == NULL) return LZMA_PROG_ERROR; *output_str = NULL; if (filters == NULL) return LZMA_PROG_ERROR; // Validate the flags. const uint32_t supported_flags = LZMA_STR_ENCODER | LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG | LZMA_STR_NO_SPACES; if (flags & ~supported_flags) return LZMA_OPTIONS_ERROR; // There must be at least one filter. if (filters[0].id == LZMA_VLI_UNKNOWN) return LZMA_OPTIONS_ERROR; // Allocate memory for the output string. lzma_str dest; return_if_error(str_init(&dest, allocator)); const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { // If we reach LZMA_FILTERS_MAX, then the filters array // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. if (i == LZMA_FILTERS_MAX) { str_free(&dest, allocator); return LZMA_OPTIONS_ERROR; } // Don't add a space between filters if the caller // doesn't want them. if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) str_append_str(&dest, " "); // Use dashes for xz getopt_long() compatible syntax but also // use dashes to separate filters when spaces weren't wanted. if ((flags & LZMA_STR_GETOPT_LONG) || (i > 0 && (flags & LZMA_STR_NO_SPACES))) str_append_str(&dest, "--"); size_t j = 0; while (true) { if (j == ARRAY_SIZE(filter_name_map)) { // Filter ID in filters[i].id isn't supported. str_free(&dest, allocator); return LZMA_OPTIONS_ERROR; } if (filter_name_map[j].id == filters[i].id) { // Add the filter name. str_append_str(&dest, filter_name_map[j].name); // If only the filter names were wanted then // skip to the next filter. In this case // .options is ignored and may be NULL even // when the filter doesn't allow NULL options. if (!show_opts) break; if (filters[i].options == NULL) { if (!filter_name_map[j].allow_null) { // Filter-specific options // are missing but with // this filter the options // structure is mandatory. str_free(&dest, allocator); return LZMA_OPTIONS_ERROR; } // .options is allowed to be NULL. // There is no need to add any // options to the string. break; } // Options structure is available. Add // the filter options to the string. const size_t optmap_count = (flags & LZMA_STR_ENCODER) ? filter_name_map[j].strfy_encoder : filter_name_map[j].strfy_decoder; strfy_filter(&dest, opt_delim, filter_name_map[j].optmap, optmap_count, filters[i].options); break; } ++j; } } return str_finish(output_str, &dest, allocator); } extern LZMA_API(lzma_ret) lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, const lzma_allocator *allocator) { // On error *output_str is always set to NULL. // Do it as the very first step. if (output_str == NULL) return LZMA_PROG_ERROR; *output_str = NULL; // Validate the flags. const uint32_t supported_flags = LZMA_STR_ALL_FILTERS | LZMA_STR_ENCODER | LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG; if (flags & ~supported_flags) return LZMA_OPTIONS_ERROR; // Allocate memory for the output string. lzma_str dest; return_if_error(str_init(&dest, allocator)); // If only listing the filter names then separate them with spaces. // Otherwise use newlines. const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); const char *filter_delim = show_opts ? "\n" : " "; const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; bool first_filter_printed = false; for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { // If we are printing only one filter then skip others. if (filter_id != LZMA_VLI_UNKNOWN && filter_id != filter_name_map[i].id) continue; // If we are printing only .xz filters then skip the others. if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START && (flags & LZMA_STR_ALL_FILTERS) == 0 && filter_id == LZMA_VLI_UNKNOWN) continue; // Add a new line if this isn't the first filter being // written to the string. if (first_filter_printed) str_append_str(&dest, filter_delim); first_filter_printed = true; if (flags & LZMA_STR_GETOPT_LONG) str_append_str(&dest, "--"); str_append_str(&dest, filter_name_map[i].name); // If only the filter names were wanted then continue // to the next filter. if (!show_opts) continue; const option_map *optmap = filter_name_map[i].optmap; const char *d = opt_delim; const size_t end = (flags & LZMA_STR_ENCODER) ? filter_name_map[i].strfy_encoder : filter_name_map[i].strfy_decoder; for (size_t j = 0; j < end; ++j) { // The first option is delimited from the filter // name using "=" or ":" and the rest of the options // are separated with ",". str_append_str(&dest, d); d = ","; // optname= str_append_str(&dest, optmap[j].name); str_append_str(&dest, "=<"); if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { // LZMA1/2 preset has its custom help string. str_append_str(&dest, LZMA12_PRESET_STR); } else if (optmap[j].flags & OPTMAP_USE_NAME_VALUE_MAP) { // Separate the possible option values by "|". const name_value_map *m = optmap[j].u.map; for (size_t k = 0; m[k].name[0] != '\0'; ++k) { if (k > 0) str_append_str(&dest, "|"); str_append_str(&dest, m[k].name); } } else { // Integer range is shown as min-max. const bool use_byte_suffix = optmap[j].flags & OPTMAP_USE_BYTE_SUFFIX; str_append_u32(&dest, optmap[j].u.range.min, use_byte_suffix); str_append_str(&dest, "-"); str_append_u32(&dest, optmap[j].u.range.max, use_byte_suffix); } str_append_str(&dest, ">"); } } // If no filters were added to the string then it must be because // the caller provided an unsupported Filter ID. if (!first_filter_printed) { str_free(&dest, allocator); return LZMA_OPTIONS_ERROR; } return str_finish(output_str, &dest, allocator); } diff --git a/contrib/xz/src/liblzma/lz/lz_encoder.h b/contrib/xz/src/liblzma/lz/lz_encoder.h index 7950a2f4ef1b..ffcba02ce931 100644 --- a/contrib/xz/src/liblzma/lz/lz_encoder.h +++ b/contrib/xz/src/liblzma/lz/lz_encoder.h @@ -1,341 +1,341 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder.h /// \brief LZ in window and match finder API /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_ENCODER_H #define LZMA_LZ_ENCODER_H #include "common.h" /// A table of these is used by the LZ-based encoder to hold /// the length-distance pairs found by the match finder. typedef struct { uint32_t len; uint32_t dist; } lzma_match; typedef struct lzma_mf_s lzma_mf; struct lzma_mf_s { /////////////// // In Window // /////////////// /// Pointer to buffer with data to be compressed uint8_t *buffer; /// Total size of the allocated buffer (that is, including all /// the extra space) uint32_t size; /// Number of bytes that must be kept available in our input history. /// That is, once keep_size_before bytes have been processed, /// buffer[read_pos - keep_size_before] is the oldest byte that /// must be available for reading. uint32_t keep_size_before; /// Number of bytes that must be kept in buffer after read_pos. /// That is, read_pos <= write_pos - keep_size_after as long as /// action is LZMA_RUN; when action != LZMA_RUN, read_pos is allowed /// to reach write_pos so that the last bytes get encoded too. uint32_t keep_size_after; /// Match finders store locations of matches using 32-bit integers. /// To avoid adjusting several megabytes of integers every time the /// input window is moved with move_window, we only adjust the /// offset of the buffer. Thus, buffer[value_in_hash_table - offset] /// is the byte pointed by value_in_hash_table. uint32_t offset; /// buffer[read_pos] is the next byte to run through the match /// finder. This is incremented in the match finder once the byte /// has been processed. uint32_t read_pos; /// Number of bytes that have been ran through the match finder, but /// which haven't been encoded by the LZ-based encoder yet. uint32_t read_ahead; /// As long as read_pos is less than read_limit, there is enough /// input available in buffer for at least one encoding loop. /// /// Because of the stateful API, read_limit may and will get greater /// than read_pos quite often. This is taken into account when /// calculating the value for keep_size_after. uint32_t read_limit; /// buffer[write_pos] is the first byte that doesn't contain valid /// uncompressed data; that is, the next input byte will be copied /// to buffer[write_pos]. uint32_t write_pos; /// Number of bytes not hashed before read_pos. This is needed to /// restart the match finder after LZMA_SYNC_FLUSH. uint32_t pending; ////////////////// // Match Finder // ////////////////// /// Find matches. Returns the number of distance-length pairs written /// to the matches array. This is called only via lzma_mf_find(). uint32_t (*find)(lzma_mf *mf, lzma_match *matches); /// Skips num bytes. This is like find() but doesn't make the /// distance-length pairs available, thus being a little faster. /// This is called only via mf_skip(). void (*skip)(lzma_mf *mf, uint32_t num); uint32_t *hash; uint32_t *son; uint32_t cyclic_pos; uint32_t cyclic_size; // Must be dictionary size + 1. uint32_t hash_mask; /// Maximum number of loops in the match finder uint32_t depth; /// Maximum length of a match that the match finder will try to find. uint32_t nice_len; /// Maximum length of a match supported by the LZ-based encoder. /// If the longest match found by the match finder is nice_len, /// mf_find() tries to expand it up to match_len_max bytes. uint32_t match_len_max; /// When running out of input, binary tree match finders need to know /// if it is due to flushing or finishing. The action is used also /// by the LZ-based encoders themselves. lzma_action action; /// Number of elements in hash[] uint32_t hash_count; /// Number of elements in son[] uint32_t sons_count; }; typedef struct { /// Extra amount of data to keep available before the "actual" /// dictionary. size_t before_size; /// Size of the history buffer size_t dict_size; /// Extra amount of data to keep available after the "actual" /// dictionary. size_t after_size; /// Maximum length of a match that the LZ-based encoder can accept. /// This is used to extend matches of length nice_len to the /// maximum possible length. size_t match_len_max; /// Match finder will search matches up to this length. /// This must be less than or equal to match_len_max. size_t nice_len; /// Type of the match finder to use lzma_match_finder match_finder; /// Maximum search depth uint32_t depth; /// TODO: Comment const uint8_t *preset_dict; uint32_t preset_dict_size; } lzma_lz_options; // The total usable buffer space at any moment outside the match finder: // before_size + dict_size + after_size + match_len_max // // In reality, there's some extra space allocated to prevent the number of // memmove() calls reasonable. The bigger the dict_size is, the bigger // this extra buffer will be since with bigger dictionaries memmove() would // also take longer. // // A single encoder loop in the LZ-based encoder may call the match finder // (mf_find() or mf_skip()) at most after_size times. In other words, // a single encoder loop may increment lzma_mf.read_pos at most after_size // times. Since matches are looked up to // lzma_mf.buffer[lzma_mf.read_pos + match_len_max - 1], the total // amount of extra buffer needed after dict_size becomes // after_size + match_len_max. // // before_size has two uses. The first one is to keep literals available // in cases when the LZ-based encoder has made some read ahead. // TODO: Maybe this could be changed by making the LZ-based encoders to // store the actual literals as they do with length-distance pairs. // // Algorithms such as LZMA2 first try to compress a chunk, and then check // if the encoded result is smaller than the uncompressed one. If the chunk -// was uncompressible, it is better to store it in uncompressed form in +// was incompressible, it is better to store it in uncompressed form in // the output stream. To do this, the whole uncompressed chunk has to be // still available in the history buffer. before_size achieves that. typedef struct { /// Data specific to the LZ-based encoder void *coder; /// Function to encode from *dict to out[] lzma_ret (*code)(void *coder, lzma_mf *restrict mf, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size); /// Free allocated resources void (*end)(void *coder, const lzma_allocator *allocator); /// Update the options in the middle of the encoding. lzma_ret (*options_update)(void *coder, const lzma_filter *filter); /// Set maximum allowed output size lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size, uint64_t out_limit); } lzma_lz_encoder; // Basic steps: // 1. Input gets copied into the dictionary. // 2. Data in dictionary gets run through the match finder byte by byte. // 3. The literals and matches are encoded using e.g. LZMA. // // The bytes that have been ran through the match finder, but not encoded yet, // are called `read ahead'. /// Get how many bytes the match finder hashes in its initial step. /// This is also the minimum nice_len value with the match finder. static inline uint32_t mf_get_hash_bytes(lzma_match_finder match_finder) { return (uint32_t)match_finder & 0x0F; } /// Get pointer to the first byte not ran through the match finder static inline const uint8_t * mf_ptr(const lzma_mf *mf) { return mf->buffer + mf->read_pos; } /// Get the number of bytes that haven't been ran through the match finder yet. static inline uint32_t mf_avail(const lzma_mf *mf) { return mf->write_pos - mf->read_pos; } /// Get the number of bytes that haven't been encoded yet (some of these /// bytes may have been ran through the match finder though). static inline uint32_t mf_unencoded(const lzma_mf *mf) { return mf->write_pos - mf->read_pos + mf->read_ahead; } /// Calculate the absolute offset from the beginning of the most recent /// dictionary reset. Only the lowest four bits are important, so there's no /// problem that we don't know the 64-bit size of the data encoded so far. /// /// NOTE: When moving the input window, we need to do it so that the lowest /// bits of dict->read_pos are not modified to keep this macro working /// as intended. static inline uint32_t mf_position(const lzma_mf *mf) { return mf->read_pos - mf->read_ahead; } /// Since everything else begins with mf_, use it also for lzma_mf_find(). #define mf_find lzma_mf_find /// Skip the given number of bytes. This is used when a good match was found. /// For example, if mf_find() finds a match of 200 bytes long, the first byte /// of that match was already consumed by mf_find(), and the rest 199 bytes /// have to be skipped with mf_skip(mf, 199). static inline void mf_skip(lzma_mf *mf, uint32_t amount) { if (amount != 0) { mf->skip(mf, amount); mf->read_ahead += amount; } } /// Copies at most *left number of bytes from the history buffer /// to out[]. This is needed by LZMA2 to encode uncompressed chunks. static inline void mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, size_t *left) { const size_t out_avail = out_size - *out_pos; const size_t copy_size = my_min(out_avail, *left); assert(mf->read_ahead == 0); assert(mf->read_pos >= *left); memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left, copy_size); *out_pos += copy_size; *left -= copy_size; return; } extern lzma_ret lzma_lz_encoder_init( lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_encoder *lz, const lzma_allocator *allocator, lzma_vli id, const void *options, lzma_lz_options *lz_options)); extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options); // These are only for LZ encoder's internal use. extern uint32_t lzma_mf_find( lzma_mf *mf, uint32_t *count, lzma_match *matches); extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches); extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount); extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches); extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount); extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches); extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount); extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches); extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount); extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches); extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount); #endif diff --git a/contrib/xz/src/liblzma/lzma/lzma_encoder.c b/contrib/xz/src/liblzma/lzma/lzma_encoder.c index dc62f44f1bfb..559c63eda1d2 100644 --- a/contrib/xz/src/liblzma/lzma/lzma_encoder.c +++ b/contrib/xz/src/liblzma/lzma/lzma_encoder.c @@ -1,784 +1,784 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder.c /// \brief LZMA encoder /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lzma2_encoder.h" #include "lzma_encoder_private.h" #include "fastpos.h" ///////////// // Literal // ///////////// static inline void literal_matched(lzma_range_encoder *rc, probability *subcoder, uint32_t match_byte, uint32_t symbol) { uint32_t offset = 0x100; symbol += UINT32_C(1) << 8; do { match_byte <<= 1; const uint32_t match_bit = match_byte & offset; const uint32_t subcoder_index = offset + match_bit + (symbol >> 8); const uint32_t bit = (symbol >> 7) & 1; rc_bit(rc, &subcoder[subcoder_index], bit); symbol <<= 1; offset &= ~(match_byte ^ symbol); } while (symbol < (UINT32_C(1) << 16)); } static inline void literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position) { // Locate the literal byte to be encoded and the subcoder. const uint8_t cur_byte = mf->buffer[ mf->read_pos - mf->read_ahead]; probability *subcoder = literal_subcoder(coder->literal, coder->literal_context_bits, coder->literal_pos_mask, position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); if (is_literal_state(coder->state)) { // Previous LZMA-symbol was a literal. Encode a normal // literal without a match byte. rc_bittree(&coder->rc, subcoder, 8, cur_byte); } else { // Previous LZMA-symbol was a match. Use the last byte of // the match as a "match byte". That is, compare the bits // of the current literal and the match byte. const uint8_t match_byte = mf->buffer[ mf->read_pos - coder->reps[0] - 1 - mf->read_ahead]; literal_matched(&coder->rc, subcoder, match_byte, cur_byte); } update_literal(coder->state); } ////////////////// // Match length // ////////////////// static void length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) { const uint32_t table_size = lc->table_size; lc->counters[pos_state] = table_size; const uint32_t a0 = rc_bit_0_price(lc->choice); const uint32_t a1 = rc_bit_1_price(lc->choice); const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); uint32_t *const prices = lc->prices[pos_state]; uint32_t i; for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) prices[i] = a0 + rc_bittree_price(lc->low[pos_state], LEN_LOW_BITS, i); for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], LEN_MID_BITS, i - LEN_LOW_SYMBOLS); for (; i < table_size; ++i) prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); return; } static inline void length(lzma_range_encoder *rc, lzma_length_encoder *lc, const uint32_t pos_state, uint32_t len, const bool fast_mode) { assert(len <= MATCH_LEN_MAX); len -= MATCH_LEN_MIN; if (len < LEN_LOW_SYMBOLS) { rc_bit(rc, &lc->choice, 0); rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); } else { rc_bit(rc, &lc->choice, 1); len -= LEN_LOW_SYMBOLS; if (len < LEN_MID_SYMBOLS) { rc_bit(rc, &lc->choice2, 0); rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); } else { rc_bit(rc, &lc->choice2, 1); len -= LEN_MID_SYMBOLS; rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); } } // Only getoptimum uses the prices so don't update the table when // in fast mode. if (!fast_mode) if (--lc->counters[pos_state] == 0) length_update_prices(lc, pos_state); } /////////// // Match // /////////// static inline void match(lzma_lzma1_encoder *coder, const uint32_t pos_state, const uint32_t distance, const uint32_t len) { update_match(coder->state); length(&coder->rc, &coder->match_len_encoder, pos_state, len, coder->fast_mode); const uint32_t dist_slot = get_dist_slot(distance); const uint32_t dist_state = get_dist_state(len); rc_bittree(&coder->rc, coder->dist_slot[dist_state], DIST_SLOT_BITS, dist_slot); if (dist_slot >= DIST_MODEL_START) { const uint32_t footer_bits = (dist_slot >> 1) - 1; const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; const uint32_t dist_reduced = distance - base; if (dist_slot < DIST_MODEL_END) { // Careful here: base - dist_slot - 1 can be -1, but // rc_bittree_reverse starts at probs[1], not probs[0]. rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1, footer_bits, dist_reduced); } else { rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS, footer_bits - ALIGN_BITS); rc_bittree_reverse( &coder->rc, coder->dist_align, ALIGN_BITS, dist_reduced & ALIGN_MASK); ++coder->align_price_count; } } coder->reps[3] = coder->reps[2]; coder->reps[2] = coder->reps[1]; coder->reps[1] = coder->reps[0]; coder->reps[0] = distance; ++coder->match_price_count; } //////////////////// // Repeated match // //////////////////// static inline void rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state, const uint32_t rep, const uint32_t len) { if (rep == 0) { rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); rc_bit(&coder->rc, &coder->is_rep0_long[coder->state][pos_state], len != 1); } else { const uint32_t distance = coder->reps[rep]; rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); if (rep == 1) { rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); } else { rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); rc_bit(&coder->rc, &coder->is_rep2[coder->state], rep - 2); if (rep == 3) coder->reps[3] = coder->reps[2]; coder->reps[2] = coder->reps[1]; } coder->reps[1] = coder->reps[0]; coder->reps[0] = distance; } if (len == 1) { update_short_rep(coder->state); } else { length(&coder->rc, &coder->rep_len_encoder, pos_state, len, coder->fast_mode); update_long_rep(coder->state); } } ////////// // Main // ////////// static void encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t back, uint32_t len, uint32_t position) { const uint32_t pos_state = position & coder->pos_mask; if (back == UINT32_MAX) { // Literal i.e. eight-bit byte assert(len == 1); rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 0); literal(coder, mf, position); } else { // Some type of match rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); if (back < REPS) { // It's a repeated match i.e. the same distance // has been used earlier. rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); rep_match(coder, pos_state, back, len); } else { // Normal match rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); match(coder, pos_state, back - REPS, len); } } assert(mf->read_ahead >= len); mf->read_ahead -= len; } static bool encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) { assert(mf_position(mf) == 0); assert(coder->uncomp_size == 0); if (mf->read_pos == mf->read_limit) { if (mf->action == LZMA_RUN) return false; // We cannot do anything. // We are finishing (we cannot get here when flushing). assert(mf->write_pos == mf->read_pos); assert(mf->action == LZMA_FINISH); } else { // Do the actual initialization. The first LZMA symbol must // always be a literal. mf_skip(mf, 1); mf->read_ahead = 0; rc_bit(&coder->rc, &coder->is_match[0][0], 0); rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); ++coder->uncomp_size; } // Initialization is done (except if empty file). coder->is_initialized = true; return true; } static void encode_eopm(lzma_lzma1_encoder *coder, uint32_t position) { const uint32_t pos_state = position & coder->pos_mask; rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); } /// Number of bytes that a single encoding loop in lzma_lzma_encode() can /// consume from the dictionary. This limit comes from lzma_lzma_optimum() /// and may need to be updated if that function is significantly modified. #define LOOP_INPUT_MAX (OPTS + 1) extern lzma_ret lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, uint32_t limit) { // Initialize the stream if no data has been encoded yet. if (!coder->is_initialized && !encode_init(coder, mf)) return LZMA_OK; // Encode pending output bytes from the range encoder. // At the start of the stream, encode_init() encodes one literal. // Later there can be pending output only with LZMA1 because LZMA2 // ensures that there is always enough output space. Thus when using // LZMA2, rc_encode() calls in this function will always return false. if (rc_encode(&coder->rc, out, out_pos, out_size)) { // We don't get here with LZMA2. assert(limit == UINT32_MAX); return LZMA_OK; } // If the range encoder was flushed in an earlier call to this // function but there wasn't enough output buffer space, those // bytes would have now been encoded by the above rc_encode() call // and the stream has now been finished. This can only happen with // LZMA1 as LZMA2 always provides enough output buffer space. if (coder->is_flushed) { assert(limit == UINT32_MAX); return LZMA_STREAM_END; } while (true) { // With LZMA2 we need to take care that compressed size of // a chunk doesn't get too big. // FIXME? Check if this could be improved. if (limit != UINT32_MAX && (mf->read_pos - mf->read_ahead >= limit || *out_pos + rc_pending(&coder->rc) >= LZMA2_CHUNK_MAX - LOOP_INPUT_MAX)) break; // Check that there is some input to process. if (mf->read_pos >= mf->read_limit) { if (mf->action == LZMA_RUN) return LZMA_OK; if (mf->read_ahead == 0) break; } // Get optimal match (repeat position and length). // Value ranges for pos: // - [0, REPS): repeated match // - [REPS, UINT32_MAX): // match at (pos - REPS) // - UINT32_MAX: not a match but a literal // Value ranges for len: // - [MATCH_LEN_MIN, MATCH_LEN_MAX] uint32_t len; uint32_t back; if (coder->fast_mode) lzma_lzma_optimum_fast(coder, mf, &back, &len); else lzma_lzma_optimum_normal(coder, mf, &back, &len, (uint32_t)(coder->uncomp_size)); encode_symbol(coder, mf, back, len, (uint32_t)(coder->uncomp_size)); // If output size limiting is active (out_limit != 0), check // if encoding this LZMA symbol would make the output size // exceed the specified limit. if (coder->out_limit != 0 && rc_encode_dummy( &coder->rc, coder->out_limit)) { // The most recent LZMA symbol would make the output // too big. Throw it away. rc_forget(&coder->rc); // FIXME: Tell the LZ layer to not read more input as // it would be waste of time. This doesn't matter if // output-size-limited encoding is done with a single // call though. break; } // This symbol will be encoded so update the uncompressed size. coder->uncomp_size += len; // Encode the LZMA symbol. if (rc_encode(&coder->rc, out, out_pos, out_size)) { // Once again, this can only happen with LZMA1. assert(limit == UINT32_MAX); return LZMA_OK; } } // Make the uncompressed size available to the application. if (coder->uncomp_size_ptr != NULL) *coder->uncomp_size_ptr = coder->uncomp_size; // LZMA2 doesn't use EOPM at LZMA level. // // Plain LZMA streams without EOPM aren't supported except when // output size limiting is enabled. if (coder->use_eopm) encode_eopm(coder, (uint32_t)(coder->uncomp_size)); // Flush the remaining bytes from the range encoder. rc_flush(&coder->rc); // Copy the remaining bytes to the output buffer. If there // isn't enough output space, we will copy out the remaining // bytes on the next call to this function. if (rc_encode(&coder->rc, out, out_pos, out_size)) { // This cannot happen with LZMA2. assert(limit == UINT32_MAX); coder->is_flushed = true; return LZMA_OK; } return LZMA_STREAM_END; } static lzma_ret lzma_encode(void *coder, lzma_mf *restrict mf, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { // Plain LZMA has no support for sync-flushing. if (unlikely(mf->action == LZMA_SYNC_FLUSH)) return LZMA_OPTIONS_ERROR; return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); } static lzma_ret lzma_lzma_set_out_limit( void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit) { // Minimum output size is 5 bytes but that cannot hold any output // so we use 6 bytes. if (out_limit < 6) return LZMA_BUF_ERROR; lzma_lzma1_encoder *coder = coder_ptr; coder->out_limit = out_limit; coder->uncomp_size_ptr = uncomp_size; coder->use_eopm = false; return LZMA_OK; } //////////////////// // Initialization // //////////////////// static bool is_options_valid(const lzma_options_lzma *options) { // Validate some of the options. LZ encoder validates nice_len too // but we need a valid value here earlier. return is_lclppb_valid(options) && options->nice_len >= MATCH_LEN_MIN && options->nice_len <= MATCH_LEN_MAX && (options->mode == LZMA_MODE_FAST || options->mode == LZMA_MODE_NORMAL); } static void set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) { // LZ encoder initialization does the validation for these so we // don't need to validate here. lz_options->before_size = OPTS; lz_options->dict_size = options->dict_size; lz_options->after_size = LOOP_INPUT_MAX; lz_options->match_len_max = MATCH_LEN_MAX; lz_options->nice_len = my_max(mf_get_hash_bytes(options->mf), options->nice_len); lz_options->match_finder = options->mf; lz_options->depth = options->depth; lz_options->preset_dict = options->preset_dict; lz_options->preset_dict_size = options->preset_dict_size; return; } static void length_encoder_reset(lzma_length_encoder *lencoder, const uint32_t num_pos_states, const bool fast_mode) { bit_reset(lencoder->choice); bit_reset(lencoder->choice2); for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); } bittree_reset(lencoder->high, LEN_HIGH_BITS); if (!fast_mode) for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) length_update_prices(lencoder, pos_state); return; } extern lzma_ret lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder, const lzma_options_lzma *options) { if (!is_options_valid(options)) return LZMA_OPTIONS_ERROR; coder->pos_mask = (1U << options->pb) - 1; coder->literal_context_bits = options->lc; coder->literal_pos_mask = (1U << options->lp) - 1; // Range coder rc_reset(&coder->rc); // State coder->state = STATE_LIT_LIT; for (size_t i = 0; i < REPS; ++i) coder->reps[i] = 0; literal_init(coder->literal, options->lc, options->lp); // Bit encoders for (size_t i = 0; i < STATES; ++i) { for (size_t j = 0; j <= coder->pos_mask; ++j) { bit_reset(coder->is_match[i][j]); bit_reset(coder->is_rep0_long[i][j]); } bit_reset(coder->is_rep[i]); bit_reset(coder->is_rep0[i]); bit_reset(coder->is_rep1[i]); bit_reset(coder->is_rep2[i]); } for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) bit_reset(coder->dist_special[i]); // Bit tree encoders for (size_t i = 0; i < DIST_STATES; ++i) bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); bittree_reset(coder->dist_align, ALIGN_BITS); // Length encoders length_encoder_reset(&coder->match_len_encoder, 1U << options->pb, coder->fast_mode); length_encoder_reset(&coder->rep_len_encoder, 1U << options->pb, coder->fast_mode); // Price counts are incremented every time appropriate probabilities // are changed. price counts are set to zero when the price tables // are updated, which is done when the appropriate price counts have // big enough value, and lzma_mf.read_ahead == 0 which happens at // least every OPTS (a few thousand) possible price count increments. // // By resetting price counts to UINT32_MAX / 2, we make sure that the // price tables will be initialized before they will be used (since // the value is definitely big enough), and that it is OK to increment // price counts without risk of integer overflow (since UINT32_MAX / 2 // is small enough). The current code doesn't increment price counts // before initializing price tables, but it maybe done in future if // we add support for saving the state between LZMA2 chunks. coder->match_price_count = UINT32_MAX / 2; coder->align_price_count = UINT32_MAX / 2; coder->opts_end_index = 0; coder->opts_current_index = 0; return LZMA_OK; } extern lzma_ret lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator, lzma_vli id, const lzma_options_lzma *options, lzma_lz_options *lz_options) { assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT || id == LZMA_FILTER_LZMA2); // Allocate lzma_lzma1_encoder if it wasn't already allocated. if (*coder_ptr == NULL) { *coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator); if (*coder_ptr == NULL) return LZMA_MEM_ERROR; } lzma_lzma1_encoder *coder = *coder_ptr; // Set compression mode. Note that we haven't validated the options // yet. Invalid options will get rejected by lzma_lzma_encoder_reset() // call at the end of this function. switch (options->mode) { case LZMA_MODE_FAST: coder->fast_mode = true; break; case LZMA_MODE_NORMAL: { coder->fast_mode = false; // Set dist_table_size. // Round the dictionary size up to next 2^n. // // Currently the maximum encoder dictionary size // is 1.5 GiB due to lz_encoder.c and here we need // to be below 2 GiB to make the rounded up value - // fit in an uint32_t and avoid an infite while-loop + // fit in an uint32_t and avoid an infinite while-loop // (and undefined behavior due to a too large shift). // So do the same check as in LZ encoder, // limiting to 1.5 GiB. if (options->dict_size > (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) return LZMA_OPTIONS_ERROR; uint32_t log_size = 0; while ((UINT32_C(1) << log_size) < options->dict_size) ++log_size; coder->dist_table_size = log_size * 2; // Length encoders' price table size const uint32_t nice_len = my_max( mf_get_hash_bytes(options->mf), options->nice_len); coder->match_len_encoder.table_size = nice_len + 1 - MATCH_LEN_MIN; coder->rep_len_encoder.table_size = nice_len + 1 - MATCH_LEN_MIN; break; } default: return LZMA_OPTIONS_ERROR; } // We don't need to write the first byte as literal if there is // a non-empty preset dictionary. encode_init() wouldn't even work // if there is a non-empty preset dictionary, because encode_init() // assumes that position is zero and previous byte is also zero. coder->is_initialized = options->preset_dict != NULL && options->preset_dict_size > 0; coder->is_flushed = false; coder->uncomp_size = 0; coder->uncomp_size_ptr = NULL; - // Output size limitting is disabled by default. + // Output size limiting is disabled by default. coder->out_limit = 0; // Determine if end marker is wanted: // - It is never used with LZMA2. // - It is always used with LZMA_FILTER_LZMA1 (unless // lzma_lzma_set_out_limit() is called later). // - LZMA_FILTER_LZMA1EXT has a flag for it in the options. coder->use_eopm = (id == LZMA_FILTER_LZMA1); if (id == LZMA_FILTER_LZMA1EXT) { // Check if unsupported flags are present. if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) return LZMA_OPTIONS_ERROR; coder->use_eopm = (options->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0; // TODO? As long as there are no filters that change the size // of the data, it is enough to look at lzma_stream.total_in // after encoding has been finished to know the uncompressed // size of the LZMA1 stream. But in the future there could be // filters that change the size of the data and then total_in // doesn't work as the LZMA1 stream size might be different // due to another filter in the chain. The problem is simple // to solve: Add another flag to ext_flags and then set // coder->uncomp_size_ptr to the address stored in // lzma_options_lzma.reserved_ptr2 (or _ptr1). } set_lz_options(lz_options, options); return lzma_lzma_encoder_reset(coder, options); } static lzma_ret lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, lzma_vli id, const void *options, lzma_lz_options *lz_options) { lz->code = &lzma_encode; lz->set_out_limit = &lzma_lzma_set_out_limit; return lzma_lzma_encoder_create( &lz->coder, allocator, id, options, lz_options); } extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return lzma_lz_encoder_init( next, allocator, filters, &lzma_encoder_init); } extern uint64_t lzma_lzma_encoder_memusage(const void *options) { if (!is_options_valid(options)) return UINT64_MAX; lzma_lz_options lz_options; set_lz_options(&lz_options, options); const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); if (lz_memusage == UINT64_MAX) return UINT64_MAX; return (uint64_t)(sizeof(lzma_lzma1_encoder)) + lz_memusage; } extern bool lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) { if (!is_lclppb_valid(options)) return true; *byte = (options->pb * 5 + options->lp) * 9 + options->lc; assert(*byte <= (4 * 5 + 4) * 9 + 8); return false; } #ifdef HAVE_ENCODER_LZMA1 extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out) { if (options == NULL) return LZMA_PROG_ERROR; const lzma_options_lzma *const opt = options; if (lzma_lzma_lclppb_encode(opt, out)) return LZMA_PROG_ERROR; write32le(out + 1, opt->dict_size); return LZMA_OK; } #endif extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) { return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; } diff --git a/contrib/xz/src/xz/args.c b/contrib/xz/src/xz/args.c index b831946f78b5..17e778c5db79 100644 --- a/contrib/xz/src/xz/args.c +++ b/contrib/xz/src/xz/args.c @@ -1,760 +1,763 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file args.c /// \brief Argument parsing /// /// \note Filter-specific options parsing is in options.c. // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include "getopt.h" #include bool opt_stdout = false; bool opt_force = false; bool opt_keep_original = false; bool opt_robot = false; bool opt_ignore_check = false; // We don't modify or free() this, but we need to assign it in some // non-const pointers. const char stdin_filename[] = "(stdin)"; /// Parse and set the memory usage limit for compression, decompression, /// and/or multithreaded decompression. static void parse_memlimit(const char *name, const char *name_percentage, const char *str, bool set_compress, bool set_decompress, bool set_mtdec) { bool is_percentage = false; uint64_t value; const size_t len = strlen(str); if (len > 0 && str[len - 1] == '%') { // Make a copy so that we can get rid of %. // // In the past str wasn't const and we modified it directly // but that modified argv[] and thus affected what was visible // in "ps auxf" or similar tools which was confusing. For // example, --memlimit=50% would show up as --memlimit=50 // since the percent sign was overwritten here. char *s = xstrdup(str); s[len - 1] = '\0'; is_percentage = true; value = str_to_uint64(name_percentage, s, 1, 100); free(s); } else { // On 32-bit systems, SIZE_MAX would make more sense than // UINT64_MAX. But use UINT64_MAX still so that scripts // that assume > 4 GiB values don't break. value = str_to_uint64(name, str, 0, UINT64_MAX); } hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec, is_percentage); return; } static void parse_block_list(const char *str_const) { // We need a modifiable string in the for-loop. char *str_start = xstrdup(str_const); char *str = str_start; // It must be non-empty and not begin with a comma. if (str[0] == '\0' || str[0] == ',') message_fatal(_("%s: Invalid argument to --block-list"), str); // Count the number of comma-separated strings. size_t count = 1; for (size_t i = 0; str[i] != '\0'; ++i) if (str[i] == ',') ++count; // Prevent an unlikely integer overflow. if (count > SIZE_MAX / sizeof(uint64_t) - 1) message_fatal(_("%s: Too many arguments to --block-list"), str); // Allocate memory to hold all the sizes specified. // If --block-list was specified already, its value is forgotten. free(opt_block_list); opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); for (size_t i = 0; i < count; ++i) { // Locate the next comma and replace it with \0. char *p = strchr(str, ','); if (p != NULL) *p = '\0'; if (str[0] == '\0') { // There is no string, that is, a comma follows // another comma. Use the previous value. // // NOTE: We checked earlier that the first char // of the whole list cannot be a comma. assert(i > 0); opt_block_list[i] = opt_block_list[i - 1]; } else { opt_block_list[i] = str_to_uint64("block-list", str, 0, UINT64_MAX); // Zero indicates no more new Blocks. if (opt_block_list[i] == 0) { if (i + 1 != count) message_fatal(_("0 can only be used " "as the last element " "in --block-list")); opt_block_list[i] = UINT64_MAX; } } str = p + 1; } // Terminate the array. opt_block_list[count] = 0; free(str_start); return; } static void parse_real(args_info *args, int argc, char **argv) { enum { OPT_X86 = INT_MIN, OPT_POWERPC, OPT_IA64, OPT_ARM, OPT_ARMTHUMB, OPT_ARM64, OPT_SPARC, OPT_DELTA, OPT_LZMA1, OPT_LZMA2, OPT_SINGLE_STREAM, OPT_NO_SPARSE, OPT_FILES, OPT_FILES0, OPT_BLOCK_SIZE, OPT_BLOCK_LIST, OPT_MEM_COMPRESS, OPT_MEM_DECOMPRESS, OPT_MEM_MT_DECOMPRESS, OPT_NO_ADJUST, OPT_INFO_MEMORY, OPT_ROBOT, OPT_FLUSH_TIMEOUT, OPT_IGNORE_CHECK, }; static const char short_opts[] = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; static const struct option long_opts[] = { // Operation mode { "compress", no_argument, NULL, 'z' }, { "decompress", no_argument, NULL, 'd' }, { "uncompress", no_argument, NULL, 'd' }, { "test", no_argument, NULL, 't' }, { "list", no_argument, NULL, 'l' }, // Operation modifiers { "keep", no_argument, NULL, 'k' }, { "force", no_argument, NULL, 'f' }, { "stdout", no_argument, NULL, 'c' }, { "to-stdout", no_argument, NULL, 'c' }, { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, { "suffix", required_argument, NULL, 'S' }, // { "recursive", no_argument, NULL, 'r' }, // TODO { "files", optional_argument, NULL, OPT_FILES }, { "files0", optional_argument, NULL, OPT_FILES0 }, // Basic compression settings { "format", required_argument, NULL, 'F' }, { "check", required_argument, NULL, 'C' }, { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS }, { "memlimit", required_argument, NULL, 'M' }, { "memory", required_argument, NULL, 'M' }, // Old alias { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, { "threads", required_argument, NULL, 'T' }, { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, { "extreme", no_argument, NULL, 'e' }, { "fast", no_argument, NULL, '0' }, { "best", no_argument, NULL, '9' }, // Filters { "lzma1", optional_argument, NULL, OPT_LZMA1 }, { "lzma2", optional_argument, NULL, OPT_LZMA2 }, { "x86", optional_argument, NULL, OPT_X86 }, { "powerpc", optional_argument, NULL, OPT_POWERPC }, { "ia64", optional_argument, NULL, OPT_IA64 }, { "arm", optional_argument, NULL, OPT_ARM }, { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, { "arm64", optional_argument, NULL, OPT_ARM64 }, { "sparc", optional_argument, NULL, OPT_SPARC }, { "delta", optional_argument, NULL, OPT_DELTA }, // Other options { "quiet", no_argument, NULL, 'q' }, { "verbose", no_argument, NULL, 'v' }, { "no-warn", no_argument, NULL, 'Q' }, { "robot", no_argument, NULL, OPT_ROBOT }, { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, { "help", no_argument, NULL, 'h' }, { "long-help", no_argument, NULL, 'H' }, { "version", no_argument, NULL, 'V' }, { NULL, 0, NULL, 0 } }; int c; while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { switch (c) { // Compression preset (also for decompression if --format=raw) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': coder_set_preset((uint32_t)(c - '0')); break; // --memlimit-compress case OPT_MEM_COMPRESS: parse_memlimit("memlimit-compress", "memlimit-compress%", optarg, true, false, false); break; // --memlimit-decompress case OPT_MEM_DECOMPRESS: parse_memlimit("memlimit-decompress", "memlimit-decompress%", optarg, false, true, false); break; // --memlimit-mt-decompress case OPT_MEM_MT_DECOMPRESS: parse_memlimit("memlimit-mt-decompress", "memlimit-mt-decompress%", optarg, false, false, true); break; // --memlimit case 'M': parse_memlimit("memlimit", "memlimit%", optarg, true, true, true); break; // --suffix case 'S': suffix_set(optarg); break; case 'T': { // Since xz 5.4.0: Ignore leading '+' first. const char *s = optarg; if (optarg[0] == '+') ++s; // The max is from src/liblzma/common/common.h. uint32_t t = str_to_uint64("threads", s, 0, 16384); // If leading '+' was used then use multi-threaded // mode even if exactly one thread was specified. if (t == 1 && optarg[0] == '+') t = UINT32_MAX; hardware_threads_set(t); break; } // --version case 'V': // This doesn't return. message_version(); // --stdout case 'c': opt_stdout = true; break; // --decompress case 'd': opt_mode = MODE_DECOMPRESS; break; // --extreme case 'e': coder_set_extreme(); break; // --force case 'f': opt_force = true; break; // --info-memory case OPT_INFO_MEMORY: // This doesn't return. hardware_memlimit_show(); // --help case 'h': // This doesn't return. message_help(false); // --long-help case 'H': // This doesn't return. message_help(true); // --list case 'l': opt_mode = MODE_LIST; break; // --keep case 'k': opt_keep_original = true; break; // --quiet case 'q': message_verbosity_decrease(); break; case 'Q': set_exit_no_warn(); break; case 't': opt_mode = MODE_TEST; break; // --verbose case 'v': message_verbosity_increase(); break; // --robot case OPT_ROBOT: opt_robot = true; // This is to make sure that floating point numbers // always have a dot as decimal separator. setlocale(LC_NUMERIC, "C"); break; case 'z': opt_mode = MODE_COMPRESS; break; // Filter setup case OPT_X86: coder_add_filter(LZMA_FILTER_X86, options_bcj(optarg)); break; case OPT_POWERPC: coder_add_filter(LZMA_FILTER_POWERPC, options_bcj(optarg)); break; case OPT_IA64: coder_add_filter(LZMA_FILTER_IA64, options_bcj(optarg)); break; case OPT_ARM: coder_add_filter(LZMA_FILTER_ARM, options_bcj(optarg)); break; case OPT_ARMTHUMB: coder_add_filter(LZMA_FILTER_ARMTHUMB, options_bcj(optarg)); break; case OPT_ARM64: coder_add_filter(LZMA_FILTER_ARM64, options_bcj(optarg)); break; case OPT_SPARC: coder_add_filter(LZMA_FILTER_SPARC, options_bcj(optarg)); break; case OPT_DELTA: coder_add_filter(LZMA_FILTER_DELTA, options_delta(optarg)); break; case OPT_LZMA1: coder_add_filter(LZMA_FILTER_LZMA1, options_lzma(optarg)); break; case OPT_LZMA2: coder_add_filter(LZMA_FILTER_LZMA2, options_lzma(optarg)); break; // Other // --format case 'F': { // Just in case, support both "lzma" and "alone" since // the latter was used for forward compatibility in // LZMA Utils 4.32.x. static const struct { char str[8]; enum format_type format; } types[] = { { "auto", FORMAT_AUTO }, { "xz", FORMAT_XZ }, { "lzma", FORMAT_LZMA }, { "alone", FORMAT_LZMA }, #ifdef HAVE_LZIP_DECODER { "lzip", FORMAT_LZIP }, #endif { "raw", FORMAT_RAW }, }; size_t i = 0; while (strcmp(types[i].str, optarg) != 0) if (++i == ARRAY_SIZE(types)) message_fatal(_("%s: Unknown file " "format type"), optarg); opt_format = types[i].format; break; } // --check case 'C': { static const struct { char str[8]; lzma_check check; } types[] = { { "none", LZMA_CHECK_NONE }, { "crc32", LZMA_CHECK_CRC32 }, { "crc64", LZMA_CHECK_CRC64 }, { "sha256", LZMA_CHECK_SHA256 }, }; size_t i = 0; while (strcmp(types[i].str, optarg) != 0) { if (++i == ARRAY_SIZE(types)) message_fatal(_("%s: Unsupported " "integrity " "check type"), optarg); } // Use a separate check in case we are using different // liblzma than what was used to compile us. if (!lzma_check_is_supported(types[i].check)) message_fatal(_("%s: Unsupported integrity " "check type"), optarg); coder_set_check(types[i].check); break; } case OPT_IGNORE_CHECK: opt_ignore_check = true; break; case OPT_BLOCK_SIZE: opt_block_size = str_to_uint64("block-size", optarg, 0, LZMA_VLI_MAX); break; case OPT_BLOCK_LIST: { parse_block_list(optarg); break; } case OPT_SINGLE_STREAM: opt_single_stream = true; break; case OPT_NO_SPARSE: io_no_sparse(); break; case OPT_FILES: args->files_delim = '\n'; // Fall through case OPT_FILES0: if (args->files_name != NULL) message_fatal(_("Only one file can be " "specified with `--files' " "or `--files0'.")); if (optarg == NULL) { args->files_name = stdin_filename; args->files_file = stdin; } else { args->files_name = optarg; args->files_file = fopen(optarg, c == OPT_FILES ? "r" : "rb"); if (args->files_file == NULL) - message_fatal("%s: %s", optarg, + // TRANSLATORS: This is a translatable + // string because French needs a space + // before the colon ("%s : %s"). + message_fatal(_("%s: %s"), optarg, strerror(errno)); } break; case OPT_NO_ADJUST: opt_auto_adjust = false; break; case OPT_FLUSH_TIMEOUT: opt_flush_timeout = str_to_uint64("flush-timeout", optarg, 0, UINT64_MAX); break; default: message_try_help(); tuklib_exit(E_ERROR, E_ERROR, false); } } return; } static void parse_environment(args_info *args, char *argv0, const char *varname) { char *env = getenv(varname); if (env == NULL) return; // We modify the string, so make a copy of it. env = xstrdup(env); // Calculate the number of arguments in env. argc stats at one // to include space for the program name. int argc = 1; bool prev_was_space = true; for (size_t i = 0; env[i] != '\0'; ++i) { // NOTE: Cast to unsigned char is needed so that correct // value gets passed to isspace(), which expects // unsigned char cast to int. Casting to int is done // automatically due to integer promotion, but we need to // force char to unsigned char manually. Otherwise 8-bit // characters would get promoted to wrong value if // char is signed. if (isspace((unsigned char)env[i])) { prev_was_space = true; } else if (prev_was_space) { prev_was_space = false; // Keep argc small enough to fit into a signed int // and to keep it usable for memory allocation. if (++argc == my_min( INT_MAX, SIZE_MAX / sizeof(char *))) message_fatal(_("The environment variable " "%s contains too many " "arguments"), varname); } } // Allocate memory to hold pointers to the arguments. Add one to get // space for the terminating NULL (if some systems happen to need it). char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); argv[0] = argv0; argv[argc] = NULL; // Go through the string again. Split the arguments using '\0' // characters and add pointers to the resulting strings to argv. argc = 1; prev_was_space = true; for (size_t i = 0; env[i] != '\0'; ++i) { if (isspace((unsigned char)env[i])) { prev_was_space = true; env[i] = '\0'; } else if (prev_was_space) { prev_was_space = false; argv[argc++] = env + i; } } // Parse the argument list we got from the environment. All non-option // arguments i.e. filenames are ignored. parse_real(args, argc, argv); // Reset the state of the getopt_long() so that we can parse the // command line options too. There are two incompatible ways to // do it. #ifdef HAVE_OPTRESET // BSD optind = 1; optreset = 1; #else // GNU, Solaris optind = 0; #endif // We don't need the argument list from environment anymore. free(argv); free(env); return; } extern void args_parse(args_info *args, int argc, char **argv) { // Initialize those parts of *args that we need later. args->files_name = NULL; args->files_file = NULL; args->files_delim = '\0'; // Check how we were called. { // Remove the leading path name, if any. const char *name = strrchr(argv[0], '/'); if (name == NULL) name = argv[0]; else ++name; // NOTE: It's possible that name[0] is now '\0' if argv[0] // is weird, but it doesn't matter here. // Look for full command names instead of substrings like // "un", "cat", and "lz" to reduce possibility of false // positives when the programs have been renamed. if (strstr(name, "xzcat") != NULL) { opt_mode = MODE_DECOMPRESS; opt_stdout = true; } else if (strstr(name, "unxz") != NULL) { opt_mode = MODE_DECOMPRESS; } else if (strstr(name, "lzcat") != NULL) { opt_format = FORMAT_LZMA; opt_mode = MODE_DECOMPRESS; opt_stdout = true; } else if (strstr(name, "unlzma") != NULL) { opt_format = FORMAT_LZMA; opt_mode = MODE_DECOMPRESS; } else if (strstr(name, "lzma") != NULL) { opt_format = FORMAT_LZMA; } } // First the flags from the environment parse_environment(args, argv[0], "XZ_DEFAULTS"); parse_environment(args, argv[0], "XZ_OPT"); // Then from the command line parse_real(args, argc, argv); // If encoder or decoder support was omitted at build time, // show an error now so that the rest of the code can rely on // that whatever is in opt_mode is also supported. #ifndef HAVE_ENCODERS if (opt_mode == MODE_COMPRESS) message_fatal(_("Compression support was disabled " "at build time")); #endif #ifndef HAVE_DECODERS // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS // is the only valid choice. if (opt_mode != MODE_COMPRESS) message_fatal(_("Decompression support was disabled " "at build time")); #endif #ifdef HAVE_LZIP_DECODER if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP) message_fatal(_("Compression of lzip files (.lz) " "is not supported")); #endif // Never remove the source file when the destination is not on disk. // In test mode the data is written nowhere, but setting opt_stdout // will make the rest of the code behave well. if (opt_stdout || opt_mode == MODE_TEST) { opt_keep_original = true; opt_stdout = true; } // When compressing, if no --format flag was used, or it // was --format=auto, we compress to the .xz format. if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) opt_format = FORMAT_XZ; // Compression settings need to be validated (options themselves and // their memory usage) when compressing to any file format. It has to // be done also when uncompressing raw data, since for raw decoding // the options given on the command line are used to know what kind // of raw data we are supposed to decode. if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW && opt_mode != MODE_LIST)) coder_set_compression_settings(); // If raw format is used and a custom suffix is not provided, // then only stdout mode can be used when compressing or decompressing. if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout && (opt_mode == MODE_COMPRESS || opt_mode == MODE_DECOMPRESS)) message_fatal(_("With --format=raw, --suffix=.SUF is " "required unless writing to stdout")); // If no filenames are given, use stdin. if (argv[optind] == NULL && args->files_name == NULL) { // We don't modify or free() the "-" constant. The caller // modifies this so don't make the struct itself const. static char *names_stdin[2] = { (char *)"-", NULL }; args->arg_names = names_stdin; args->arg_count = 1; } else { // We got at least one filename from the command line, or // --files or --files0 was specified. args->arg_names = argv + optind; args->arg_count = (unsigned int)(argc - optind); } return; } #ifndef NDEBUG extern void args_free(void) { free(opt_block_list); return; } #endif diff --git a/contrib/xz/src/xz/coder.c b/contrib/xz/src/xz/coder.c index 05f228882b73..589ec072e63c 100644 --- a/contrib/xz/src/xz/coder.c +++ b/contrib/xz/src/xz/coder.c @@ -1,1108 +1,1108 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file coder.c /// \brief Compresses or uncompresses a file // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" /// Return value type for coder_init(). enum coder_init_ret { CODER_INIT_NORMAL, CODER_INIT_PASSTHRU, CODER_INIT_ERROR, }; enum operation_mode opt_mode = MODE_COMPRESS; enum format_type opt_format = FORMAT_AUTO; bool opt_auto_adjust = true; bool opt_single_stream = false; uint64_t opt_block_size = 0; uint64_t *opt_block_list = NULL; /// Stream used to communicate with liblzma static lzma_stream strm = LZMA_STREAM_INIT; /// Filters needed for all encoding all formats, and also decoding in raw data static lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Input and output buffers static io_buf in_buf; static io_buf out_buf; /// Number of filters. Zero indicates that we are using a preset. static uint32_t filters_count = 0; /// Number of the preset (0-9) static uint32_t preset_number = LZMA_PRESET_DEFAULT; /// Integrity check type static lzma_check check; /// This becomes false if the --check=CHECK option is used. static bool check_default = true; /// Indicates if unconsumed input is allowed to remain after /// decoding has successfully finished. This is set for each file /// in coder_init(). static bool allow_trailing_input; #ifdef MYTHREAD_ENABLED static lzma_mt mt_options = { .flags = 0, .timeout = 300, .filters = filters, }; #endif extern void coder_set_check(lzma_check new_check) { check = new_check; check_default = false; return; } static void forget_filter_chain(void) { // Setting a preset makes us forget a possibly defined custom // filter chain. while (filters_count > 0) { --filters_count; free(filters[filters_count].options); filters[filters_count].options = NULL; } return; } extern void coder_set_preset(uint32_t new_preset) { preset_number &= ~LZMA_PRESET_LEVEL_MASK; preset_number |= new_preset; forget_filter_chain(); return; } extern void coder_set_extreme(void) { preset_number |= LZMA_PRESET_EXTREME; forget_filter_chain(); return; } extern void coder_add_filter(lzma_vli id, void *options) { if (filters_count == LZMA_FILTERS_MAX) message_fatal(_("Maximum number of filters is four")); filters[filters_count].id = id; filters[filters_count].options = options; ++filters_count; // Setting a custom filter chain makes us forget the preset options. // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" // where the custom filter chain resets the preset level back to // the default 6, making the example equivalent to "xz -6e". preset_number = LZMA_PRESET_DEFAULT; return; } static void lzma_attribute((__noreturn__)) memlimit_too_small(uint64_t memory_usage) { message(V_ERROR, _("Memory usage limit is too low for the given " "filter setup.")); message_mem_needed(V_ERROR, memory_usage); tuklib_exit(E_ERROR, E_ERROR, false); } extern void coder_set_compression_settings(void) { #ifdef HAVE_LZIP_DECODER // .lz compression isn't supported. assert(opt_format != FORMAT_LZIP); #endif // The default check type is CRC64, but fallback to CRC32 // if CRC64 isn't supported by the copy of liblzma we are // using. CRC32 is always supported. if (check_default) { check = LZMA_CHECK_CRC64; if (!lzma_check_is_supported(check)) check = LZMA_CHECK_CRC32; } // Options for LZMA1 or LZMA2 in case we are using a preset. static lzma_options_lzma opt_lzma; if (filters_count == 0) { // We are using a preset. This is not a good idea in raw mode // except when playing around with things. Different versions // of this software may use different options in presets, and // thus make uncompressing the raw data difficult. if (opt_format == FORMAT_RAW) { // The message is shown only if warnings are allowed // but the exit status isn't changed. message(V_WARNING, _("Using a preset in raw mode " "is discouraged.")); message(V_WARNING, _("The exact options of the " "presets may vary between software " "versions.")); } // Get the preset for LZMA1 or LZMA2. if (lzma_lzma_preset(&opt_lzma, preset_number)) message_bug(); // Use LZMA2 except with --format=lzma we use LZMA1. filters[0].id = opt_format == FORMAT_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; filters[0].options = &opt_lzma; filters_count = 1; } // Terminate the filter options array. filters[filters_count].id = LZMA_VLI_UNKNOWN; // If we are using the .lzma format, allow exactly one filter // which has to be LZMA1. if (opt_format == FORMAT_LZMA && (filters_count != 1 || filters[0].id != LZMA_FILTER_LZMA1)) message_fatal(_("The .lzma format supports only " "the LZMA1 filter")); // If we are using the .xz format, make sure that there is no LZMA1 // filter to prevent LZMA_PROG_ERROR. if (opt_format == FORMAT_XZ) for (size_t i = 0; i < filters_count; ++i) if (filters[i].id == LZMA_FILTER_LZMA1) message_fatal(_("LZMA1 cannot be used " "with the .xz format")); // Print the selected filter chain. message_filters_show(V_DEBUG, filters); // The --flush-timeout option requires LZMA_SYNC_FLUSH support // from the filter chain. Currently threaded encoder doesn't support // LZMA_SYNC_FLUSH so single-threaded mode must be used. if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { for (size_t i = 0; i < filters_count; ++i) { switch (filters[i].id) { case LZMA_FILTER_LZMA2: case LZMA_FILTER_DELTA: break; default: message_fatal(_("The filter chain is " "incompatible with --flush-timeout")); } } if (hardware_threads_is_mt()) { message(V_WARNING, _("Switching to single-threaded " "mode due to --flush-timeout")); hardware_threads_set(1); } } // Get the memory usage. Note that if --format=raw was used, // we can be decompressing. // // If multithreaded .xz compression is done, this value will be // replaced. uint64_t memory_limit = hardware_memlimit_get(opt_mode); uint64_t memory_usage = UINT64_MAX; if (opt_mode == MODE_COMPRESS) { #ifdef HAVE_ENCODERS # ifdef MYTHREAD_ENABLED if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) { memory_limit = hardware_memlimit_mtenc_get(); mt_options.threads = hardware_threads_get(); mt_options.block_size = opt_block_size; mt_options.check = check; memory_usage = lzma_stream_encoder_mt_memusage( &mt_options); if (memory_usage != UINT64_MAX) message(V_DEBUG, _("Using up to %" PRIu32 " threads."), mt_options.threads); } else # endif { memory_usage = lzma_raw_encoder_memusage(filters); } #endif } else { #ifdef HAVE_DECODERS memory_usage = lzma_raw_decoder_memusage(filters); #endif } if (memory_usage == UINT64_MAX) message_fatal(_("Unsupported filter chain or filter options")); // Print memory usage info before possible dictionary // size auto-adjusting. // // NOTE: If only encoder support was built, we cannot show the // what the decoder memory usage will be. message_mem_needed(V_DEBUG, memory_usage); #ifdef HAVE_DECODERS if (opt_mode == MODE_COMPRESS) { const uint64_t decmem = lzma_raw_decoder_memusage(filters); if (decmem != UINT64_MAX) message(V_DEBUG, _("Decompression will need " "%s MiB of memory."), uint64_to_str( round_up_to_mib(decmem), 0)); } #endif if (memory_usage <= memory_limit) return; // With --format=raw settings are never adjusted to meet // the memory usage limit. if (opt_format == FORMAT_RAW) memlimit_too_small(memory_usage); assert(opt_mode == MODE_COMPRESS); #ifdef HAVE_ENCODERS # ifdef MYTHREAD_ENABLED if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) { // Try to reduce the number of threads before // adjusting the compression settings down. while (mt_options.threads > 1) { // Reduce the number of threads by one and check // the memory usage. --mt_options.threads; memory_usage = lzma_stream_encoder_mt_memusage( &mt_options); if (memory_usage == UINT64_MAX) message_bug(); if (memory_usage <= memory_limit) { // The memory usage is now low enough. message(V_WARNING, _("Reduced the number of " "threads from %s to %s to not exceed " "the memory usage limit of %s MiB"), uint64_to_str( hardware_threads_get(), 0), uint64_to_str(mt_options.threads, 1), uint64_to_str(round_up_to_mib( memory_limit), 2)); return; } } // If the memory usage limit is only a soft limit (automatic // number of threads and no --memlimit-compress), the limit // is only used to reduce the number of threads and once at // just one thread, the limit is completely ignored. This // way -T0 won't use insane amount of memory but at the same // time the soft limit will never make xz fail and never make // xz change settings that would affect the compressed output. if (hardware_memlimit_mtenc_is_default()) { message(V_WARNING, _("Reduced the number of threads " "from %s to one. The automatic memory usage " "limit of %s MiB is still being exceeded. " "%s MiB of memory is required. " "Continuing anyway."), uint64_to_str(hardware_threads_get(), 0), uint64_to_str( round_up_to_mib(memory_limit), 1), uint64_to_str( round_up_to_mib(memory_usage), 2)); return; } // If --no-adjust was used, we cannot drop to single-threaded // mode since it produces different compressed output. // // NOTE: In xz 5.2.x, --no-adjust also prevented reducing // the number of threads. This changed in 5.3.3alpha. if (!opt_auto_adjust) memlimit_too_small(memory_usage); // Switch to single-threaded mode. It uses // less memory than using one thread in // the multithreaded mode but the output // is also different. hardware_threads_set(1); memory_usage = lzma_raw_encoder_memusage(filters); message(V_WARNING, _("Switching to single-threaded mode " "to not exceed the memory usage limit of %s MiB"), uint64_to_str(round_up_to_mib(memory_limit), 0)); } # endif if (memory_usage <= memory_limit) return; // Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust // was specified as that would change the compressed output. if (!opt_auto_adjust) memlimit_too_small(memory_usage); // Look for the last filter if it is LZMA2 or LZMA1, so we can make // it use less RAM. With other filters we don't know what to do. size_t i = 0; while (filters[i].id != LZMA_FILTER_LZMA2 && filters[i].id != LZMA_FILTER_LZMA1) { if (filters[i].id == LZMA_VLI_UNKNOWN) memlimit_too_small(memory_usage); ++i; } // Decrease the dictionary size until we meet the memory // usage limit. First round down to full mebibytes. lzma_options_lzma *opt = filters[i].options; const uint32_t orig_dict_size = opt->dict_size; opt->dict_size &= ~((UINT32_C(1) << 20) - 1); while (true) { // If it is below 1 MiB, auto-adjusting failed. We could be // more sophisticated and scale it down even more, but let's // see if many complain about this version. // // FIXME: Displays the scaled memory usage instead // of the original. if (opt->dict_size < (UINT32_C(1) << 20)) memlimit_too_small(memory_usage); memory_usage = lzma_raw_encoder_memusage(filters); if (memory_usage == UINT64_MAX) message_bug(); // Accept it if it is low enough. if (memory_usage <= memory_limit) break; // Otherwise 1 MiB down and try again. I hope this // isn't too slow method for cases where the original // dict_size is very big. opt->dict_size -= UINT32_C(1) << 20; } // Tell the user that we decreased the dictionary size. message(V_WARNING, _("Adjusted LZMA%c dictionary size " "from %s MiB to %s MiB to not exceed " "the memory usage limit of %s MiB"), filters[i].id == LZMA_FILTER_LZMA2 ? '2' : '1', uint64_to_str(orig_dict_size >> 20, 0), uint64_to_str(opt->dict_size >> 20, 1), uint64_to_str(round_up_to_mib(memory_limit), 2)); #endif return; } #ifdef HAVE_DECODERS /// Return true if the data in in_buf seems to be in the .xz format. static bool is_format_xz(void) { // Specify the magic as hex to be compatible with EBCDIC systems. static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; return strm.avail_in >= sizeof(magic) && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; } /// Return true if the data in in_buf seems to be in the .lzma format. static bool is_format_lzma(void) { // The .lzma header is 13 bytes. if (strm.avail_in < 13) return false; // Decode the LZMA1 properties. lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) return false; // A hack to ditch tons of false positives: We allow only dictionary // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone // created only files with 2^n, but accepts any dictionary size. // If someone complains, this will be reconsidered. lzma_options_lzma *opt = filter.options; const uint32_t dict_size = opt->dict_size; free(opt); if (dict_size != UINT32_MAX) { uint32_t d = dict_size - 1; d |= d >> 2; d |= d >> 3; d |= d >> 4; d |= d >> 8; d |= d >> 16; ++d; if (d != dict_size || dict_size == 0) return false; } // Another hack to ditch false positives: Assume that if the // uncompressed size is known, it must be less than 256 GiB. // Again, if someone complains, this will be reconsidered. uint64_t uncompressed_size = 0; for (size_t i = 0; i < 8; ++i) uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); if (uncompressed_size != UINT64_MAX && uncompressed_size > (UINT64_C(1) << 38)) return false; return true; } #ifdef HAVE_LZIP_DECODER /// Return true if the data in in_buf seems to be in the .lz format. static bool is_format_lzip(void) { static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; return strm.avail_in >= sizeof(magic) && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; } #endif #endif /// Detect the input file type (for now, this done only when decompressing), /// and initialize an appropriate coder. Return value indicates if a normal /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred /// (CODER_INIT_ERROR). static enum coder_init_ret coder_init(file_pair *pair) { lzma_ret ret = LZMA_PROG_ERROR; // In most cases if there is input left when coding finishes, // something has gone wrong. Exceptions are --single-stream // and decoding .lz files which can contain trailing non-.lz data. // These will be handled later in this function. allow_trailing_input = false; if (opt_mode == MODE_COMPRESS) { #ifdef HAVE_ENCODERS switch (opt_format) { case FORMAT_AUTO: // args.c ensures this. assert(0); break; case FORMAT_XZ: # ifdef MYTHREAD_ENABLED if (hardware_threads_is_mt()) ret = lzma_stream_encoder_mt( &strm, &mt_options); else # endif ret = lzma_stream_encoder( &strm, filters, check); break; case FORMAT_LZMA: ret = lzma_alone_encoder(&strm, filters[0].options); break; # ifdef HAVE_LZIP_DECODER case FORMAT_LZIP: // args.c should disallow this. assert(0); ret = LZMA_PROG_ERROR; break; # endif case FORMAT_RAW: ret = lzma_raw_encoder(&strm, filters); break; } #endif } else { #ifdef HAVE_DECODERS uint32_t flags = 0; // It seems silly to warn about unsupported check if the // check won't be verified anyway due to --ignore-check. if (opt_ignore_check) flags |= LZMA_IGNORE_CHECK; else flags |= LZMA_TELL_UNSUPPORTED_CHECK; if (opt_single_stream) allow_trailing_input = true; else flags |= LZMA_CONCATENATED; // We abuse FORMAT_AUTO to indicate unknown file format, // for which we may consider passthru mode. enum format_type init_format = FORMAT_AUTO; switch (opt_format) { case FORMAT_AUTO: // .lz is checked before .lzma since .lzma detection // is more complicated (no magic bytes). if (is_format_xz()) init_format = FORMAT_XZ; # ifdef HAVE_LZIP_DECODER else if (is_format_lzip()) init_format = FORMAT_LZIP; # endif else if (is_format_lzma()) init_format = FORMAT_LZMA; break; case FORMAT_XZ: if (is_format_xz()) init_format = FORMAT_XZ; break; case FORMAT_LZMA: if (is_format_lzma()) init_format = FORMAT_LZMA; break; # ifdef HAVE_LZIP_DECODER case FORMAT_LZIP: if (is_format_lzip()) init_format = FORMAT_LZIP; break; # endif case FORMAT_RAW: init_format = FORMAT_RAW; break; } switch (init_format) { case FORMAT_AUTO: // Unknown file format. If --decompress --stdout // --force have been given, then we copy the input // as is to stdout. Checking for MODE_DECOMPRESS // is needed, because we don't want to do use // passthru mode with --test. if (opt_mode == MODE_DECOMPRESS && opt_stdout && opt_force) { // These are needed for progress info. strm.total_in = 0; strm.total_out = 0; return CODER_INIT_PASSTHRU; } ret = LZMA_FORMAT_ERROR; break; case FORMAT_XZ: # ifdef MYTHREAD_ENABLED mt_options.flags = flags; mt_options.threads = hardware_threads_get(); mt_options.memlimit_stop = hardware_memlimit_get(MODE_DECOMPRESS); // If single-threaded mode was requested, set the // memlimit for threading to zero. This forces the // decoder to use single-threaded mode which matches // the behavior of lzma_stream_decoder(). // // Otherwise use the limit for threaded decompression // which has a sane default (users are still free to // make it insanely high though). mt_options.memlimit_threading = mt_options.threads == 1 ? 0 : hardware_memlimit_mtdec_get(); ret = lzma_stream_decoder_mt(&strm, &mt_options); # else ret = lzma_stream_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS), flags); # endif break; case FORMAT_LZMA: ret = lzma_alone_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS)); break; # ifdef HAVE_LZIP_DECODER case FORMAT_LZIP: allow_trailing_input = true; ret = lzma_lzip_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS), flags); break; # endif case FORMAT_RAW: // Memory usage has already been checked in // coder_set_compression_settings(). ret = lzma_raw_decoder(&strm, filters); break; } // Try to decode the headers. This will catch too low // memory usage limit in case it happens in the first // Block of the first Stream, which is where it very // probably will happen if it is going to happen. // // This will also catch unsupported check type which // we treat as a warning only. If there are empty // concatenated Streams with unsupported check type then // the message can be shown more than once here. The loop // is used in case there is first a warning about // unsupported check type and then the first Block // would exceed the memlimit. if (ret == LZMA_OK && init_format != FORMAT_RAW) { strm.next_out = NULL; strm.avail_out = 0; while ((ret = lzma_code(&strm, LZMA_RUN)) == LZMA_UNSUPPORTED_CHECK) - message_warning("%s: %s", pair->src_name, + message_warning(_("%s: %s"), pair->src_name, message_strm(ret)); // With --single-stream lzma_code won't wait for // LZMA_FINISH and thus it can return LZMA_STREAM_END // if the file has no uncompressed data inside. // So treat LZMA_STREAM_END as LZMA_OK here. // When lzma_code() is called again in coder_normal() // it will return LZMA_STREAM_END again. if (ret == LZMA_STREAM_END) ret = LZMA_OK; } #endif } if (ret != LZMA_OK) { - message_error("%s: %s", pair->src_name, message_strm(ret)); + message_error(_("%s: %s"), pair->src_name, message_strm(ret)); if (ret == LZMA_MEMLIMIT_ERROR) message_mem_needed(V_ERROR, lzma_memusage(&strm)); return CODER_INIT_ERROR; } return CODER_INIT_NORMAL; } /// Resolve conflicts between opt_block_size and opt_block_list in single /// threaded mode. We want to default to opt_block_list, except when it is /// larger than opt_block_size. If this is the case for the current Block /// at *list_pos, then we break into smaller Blocks. Otherwise advance /// to the next Block in opt_block_list, and break apart if needed. static void split_block(uint64_t *block_remaining, uint64_t *next_block_remaining, size_t *list_pos) { if (*next_block_remaining > 0) { // The Block at *list_pos has previously been split up. assert(!hardware_threads_is_mt()); assert(opt_block_size > 0); assert(opt_block_list != NULL); if (*next_block_remaining > opt_block_size) { // We have to split the current Block at *list_pos // into another opt_block_size length Block. *block_remaining = opt_block_size; } else { // This is the last remaining split Block for the // Block at *list_pos. *block_remaining = *next_block_remaining; } *next_block_remaining -= *block_remaining; } else { // The Block at *list_pos has been finished. Go to the next // entry in the list. If the end of the list has been reached, // reuse the size of the last Block. if (opt_block_list[*list_pos + 1] != 0) ++*list_pos; *block_remaining = opt_block_list[*list_pos]; // If in single-threaded mode, split up the Block if needed. // This is not needed in multi-threaded mode because liblzma // will do this due to how threaded encoding works. if (!hardware_threads_is_mt() && opt_block_size > 0 && *block_remaining > opt_block_size) { *next_block_remaining = *block_remaining - opt_block_size; *block_remaining = opt_block_size; } } } static bool coder_write_output(file_pair *pair) { if (opt_mode != MODE_TEST) { if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) return true; } strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; return false; } /// Compress or decompress using liblzma. static bool coder_normal(file_pair *pair) { // Encoder needs to know when we have given all the input to it. // The decoders need to know it too when we are using // LZMA_CONCATENATED. We need to check for src_eof here, because // the first input chunk has been already read if decompressing, // and that may have been the only chunk we will read. lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; lzma_ret ret; // Assume that something goes wrong. bool success = false; // block_remaining indicates how many input bytes to encode before // finishing the current .xz Block. The Block size is set with // --block-size=SIZE and --block-list. They have an effect only when // compressing to the .xz format. If block_remaining == UINT64_MAX, // only a single block is created. uint64_t block_remaining = UINT64_MAX; // next_block_remaining for when we are in single-threaded mode and // the Block in --block-list is larger than the --block-size=SIZE. uint64_t next_block_remaining = 0; // Position in opt_block_list. Unused if --block-list wasn't used. size_t list_pos = 0; // Handle --block-size for single-threaded mode and the first step // of --block-list. if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { // --block-size doesn't do anything here in threaded mode, // because the threaded encoder will take care of splitting // to fixed-sized Blocks. if (!hardware_threads_is_mt() && opt_block_size > 0) block_remaining = opt_block_size; // If --block-list was used, start with the first size. // // For threaded case, --block-size specifies how big Blocks // the encoder needs to be prepared to create at maximum // and --block-list will simultaneously cause new Blocks // to be started at specified intervals. To keep things // logical, the same is done in single-threaded mode. The // output is still not identical because in single-threaded // mode the size info isn't written into Block Headers. if (opt_block_list != NULL) { if (block_remaining < opt_block_list[list_pos]) { assert(!hardware_threads_is_mt()); next_block_remaining = opt_block_list[list_pos] - block_remaining; } else { block_remaining = opt_block_list[list_pos]; } } } strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; while (!user_abort) { // Fill the input buffer if it is empty and we aren't // flushing or finishing. if (strm.avail_in == 0 && action == LZMA_RUN) { strm.next_in = in_buf.u8; strm.avail_in = io_read(pair, &in_buf, my_min(block_remaining, IO_BUFFER_SIZE)); if (strm.avail_in == SIZE_MAX) break; if (pair->src_eof) { action = LZMA_FINISH; } else if (block_remaining != UINT64_MAX) { // Start a new Block after every // opt_block_size bytes of input. block_remaining -= strm.avail_in; if (block_remaining == 0) action = LZMA_FULL_BARRIER; } if (action == LZMA_RUN && pair->flush_needed) action = LZMA_SYNC_FLUSH; } // Let liblzma do the actual work. ret = lzma_code(&strm, action); // Write out if the output buffer became full. if (strm.avail_out == 0) { if (coder_write_output(pair)) break; } if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH || action == LZMA_FULL_BARRIER)) { if (action == LZMA_SYNC_FLUSH) { // Flushing completed. Write the pending data // out immediately so that the reading side // can decompress everything compressed so far. if (coder_write_output(pair)) break; // Mark that we haven't seen any new input // since the previous flush. pair->src_has_seen_input = false; pair->flush_needed = false; } else { // Start a new Block after LZMA_FULL_BARRIER. if (opt_block_list == NULL) { assert(!hardware_threads_is_mt()); assert(opt_block_size > 0); block_remaining = opt_block_size; } else { split_block(&block_remaining, &next_block_remaining, &list_pos); } } // Start a new Block after LZMA_FULL_FLUSH or continue // the same block after LZMA_SYNC_FLUSH. action = LZMA_RUN; } else if (ret != LZMA_OK) { // Determine if the return value indicates that we // won't continue coding. LZMA_NO_CHECK would be // here too if LZMA_TELL_ANY_CHECK was used. const bool stop = ret != LZMA_UNSUPPORTED_CHECK; if (stop) { // Write the remaining bytes even if something // went wrong, because that way the user gets // as much data as possible, which can be good // when trying to get at least some useful // data out of damaged files. if (coder_write_output(pair)) break; } if (ret == LZMA_STREAM_END) { if (allow_trailing_input) { io_fix_src_pos(pair, strm.avail_in); success = true; break; } // Check that there is no trailing garbage. // This is needed for LZMA_Alone and raw // streams. This is *not* done with .lz files // as that format specifically requires // allowing trailing garbage. if (strm.avail_in == 0 && !pair->src_eof) { // Try reading one more byte. // Hopefully we don't get any more // input, and thus pair->src_eof // becomes true. strm.avail_in = io_read( pair, &in_buf, 1); if (strm.avail_in == SIZE_MAX) break; assert(strm.avail_in == 0 || strm.avail_in == 1); } if (strm.avail_in == 0) { assert(pair->src_eof); success = true; break; } // We hadn't reached the end of the file. ret = LZMA_DATA_ERROR; assert(stop); } // If we get here and stop is true, something went // wrong and we print an error. Otherwise it's just // a warning and coding can continue. if (stop) { - message_error("%s: %s", pair->src_name, + message_error(_("%s: %s"), pair->src_name, message_strm(ret)); } else { - message_warning("%s: %s", pair->src_name, + message_warning(_("%s: %s"), pair->src_name, message_strm(ret)); // When compressing, all possible errors set // stop to true. assert(opt_mode != MODE_COMPRESS); } if (ret == LZMA_MEMLIMIT_ERROR) { // Display how much memory it would have // actually needed. message_mem_needed(V_ERROR, lzma_memusage(&strm)); } if (stop) break; } // Show progress information under certain conditions. message_progress_update(); } return success; } /// Copy from input file to output file without processing the data in any /// way. This is used only when trying to decompress unrecognized files /// with --decompress --stdout --force, so the output is always stdout. static bool coder_passthru(file_pair *pair) { while (strm.avail_in != 0) { if (user_abort) return false; if (io_write(pair, &in_buf, strm.avail_in)) return false; strm.total_in += strm.avail_in; strm.total_out = strm.total_in; message_progress_update(); strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); if (strm.avail_in == SIZE_MAX) return false; } return true; } extern void coder_run(const char *filename) { // Set and possibly print the filename for the progress message. message_filename(filename); // Try to open the input file. file_pair *pair = io_open_src(filename); if (pair == NULL) return; // Assume that something goes wrong. bool success = false; if (opt_mode == MODE_COMPRESS) { strm.next_in = NULL; strm.avail_in = 0; } else { // Read the first chunk of input data. This is needed // to detect the input file type. strm.next_in = in_buf.u8; strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); } if (strm.avail_in != SIZE_MAX) { // Initialize the coder. This will detect the file format // and, in decompression or testing mode, check the memory // usage of the first Block too. This way we don't try to // open the destination file if we see that coding wouldn't // work at all anyway. This also avoids deleting the old // "target" file if --force was used. const enum coder_init_ret init_ret = coder_init(pair); if (init_ret != CODER_INIT_ERROR && !user_abort) { // Don't open the destination file when --test // is used. if (opt_mode == MODE_TEST || !io_open_dest(pair)) { // Remember the current time. It is needed // for progress indicator. mytime_set_start_time(); // Initialize the progress indicator. // // NOTE: When reading from stdin, fstat() // isn't called on it and thus src_st.st_size // is zero. If stdin pointed to a regular // file, it would still be possible to know // the file size but then we would also need // to take into account the current reading // position since with stdin it isn't // necessarily at the beginning of the file. const bool is_passthru = init_ret == CODER_INIT_PASSTHRU; const uint64_t in_size = pair->src_st.st_size <= 0 ? 0 : (uint64_t)(pair->src_st.st_size); message_progress_start(&strm, is_passthru, in_size); // Do the actual coding or passthru. if (is_passthru) success = coder_passthru(pair); else success = coder_normal(pair); message_progress_end(success); } } } // Close the file pair. It needs to know if coding was successful to // know if the source or target file should be unlinked. io_close(pair, success); return; } #ifndef NDEBUG extern void coder_free(void) { lzma_end(&strm); return; } #endif diff --git a/contrib/xz/src/xz/file_io.c b/contrib/xz/src/xz/file_io.c index 29f46ea7fe93..a181b531258f 100644 --- a/contrib/xz/src/xz/file_io.c +++ b/contrib/xz/src/xz/file_io.c @@ -1,1383 +1,1383 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file file_io.c /// \brief File opening, unlinking, and closing // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include #ifdef TUKLIB_DOSLIKE # include #else # include static bool warn_fchown; #endif #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) # include #elif defined(HAVE__FUTIME) # include #elif defined(HAVE_UTIME) # include #endif #ifdef HAVE_CAPSICUM # ifdef HAVE_SYS_CAPSICUM_H # include # else # include # endif #endif #include "tuklib_open_stdxxx.h" #ifndef O_BINARY # define O_BINARY 0 #endif #ifndef O_NOCTTY # define O_NOCTTY 0 #endif // Using this macro to silence a warning from gcc -Wlogical-op. #if EAGAIN == EWOULDBLOCK # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) #else # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ ((e) == EAGAIN || (e) == EWOULDBLOCK) #endif typedef enum { IO_WAIT_MORE, // Reading or writing is possible. IO_WAIT_ERROR, // Error or user_abort IO_WAIT_TIMEOUT, // poll() timed out } io_wait_ret; /// If true, try to create sparse files when decompressing. static bool try_sparse = true; #ifdef ENABLE_SANDBOX /// True if the conditions for sandboxing (described in main()) have been met. static bool sandbox_allowed = false; #endif #ifndef TUKLIB_DOSLIKE /// File status flags of standard input. This is used by io_open_src() /// and io_close_src(). static int stdin_flags; static bool restore_stdin_flags = false; /// Original file status flags of standard output. This is used by /// io_open_dest() and io_close_dest() to save and restore the flags. static int stdout_flags; static bool restore_stdout_flags = false; /// Self-pipe used together with the user_abort variable to avoid /// race conditions with signal handling. static int user_abort_pipe[2]; #endif static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); extern void io_init(void) { // Make sure that stdin, stdout, and stderr are connected to // a valid file descriptor. Exit immediately with exit code ERROR // if we cannot make the file descriptors valid. Maybe we should // print an error message, but our stderr could be screwed anyway. tuklib_open_stdxxx(E_ERROR); #ifndef TUKLIB_DOSLIKE // If fchown() fails setting the owner, we warn about it only if // we are root. warn_fchown = geteuid() == 0; // Create a pipe for the self-pipe trick. if (pipe(user_abort_pipe)) message_fatal(_("Error creating a pipe: %s"), strerror(errno)); // Make both ends of the pipe non-blocking. for (unsigned i = 0; i < 2; ++i) { int flags = fcntl(user_abort_pipe[i], F_GETFL); if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, flags | O_NONBLOCK) == -1) message_fatal(_("Error creating a pipe: %s"), strerror(errno)); } #endif #ifdef __DJGPP__ // Avoid doing useless things when statting files. // This isn't important but doesn't hurt. _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; #endif return; } #ifndef TUKLIB_DOSLIKE extern void io_write_to_user_abort_pipe(void) { // If the write() fails, it's probably due to the pipe being full. // Failing in that case is fine. If the reason is something else, // there's not much we can do since this is called in a signal // handler. So ignore the errors and try to avoid warnings with // GCC and glibc when _FORTIFY_SOURCE=2 is used. uint8_t b = '\0'; const ssize_t ret = write(user_abort_pipe[1], &b, 1); (void)ret; return; } #endif extern void io_no_sparse(void) { try_sparse = false; return; } #ifdef ENABLE_SANDBOX extern void io_allow_sandbox(void) { sandbox_allowed = true; return; } /// Enables operating-system-specific sandbox if it is possible. /// src_fd is the file descriptor of the input file. static void io_sandbox_enter(int src_fd) { if (!sandbox_allowed) { // This message is more often annoying than useful so // it's commented out. It can be useful when developing // the sandboxing code. //message(V_DEBUG, _("Sandbox is disabled due " // "to incompatible command line arguments")); return; } const char dummy_str[] = "x"; // Try to ensure that both libc and xz locale files have been // loaded when NLS is enabled. snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); // Try to ensure that iconv data files needed for handling multibyte // characters have been loaded. This is needed at least with glibc. tuklib_mbstr_width(dummy_str, NULL); #ifdef HAVE_CAPSICUM // Capsicum needs FreeBSD 10.0 or later. cap_rights_t rights; if (cap_enter()) goto error; if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) goto error; if (src_fd != STDIN_FILENO && cap_rights_limit( STDIN_FILENO, cap_rights_clear(&rights))) goto error; if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, CAP_WRITE, CAP_SEEK))) goto error; if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, CAP_WRITE))) goto error; if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, CAP_EVENT))) goto error; if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, CAP_WRITE))) goto error; #elif defined(HAVE_PLEDGE) // pledge() was introduced in OpenBSD 5.9. // // main() unconditionally calls pledge() with fairly relaxed // promises which work in all situations. Here we make the // sandbox more strict. if (pledge("stdio", "")) goto error; (void)src_fd; #else # error ENABLE_SANDBOX is defined but no sandboxing method was found. #endif // This message is annoying in xz -lvv. //message(V_DEBUG, _("Sandbox was successfully enabled")); return; error: #ifdef HAVE_CAPSICUM // If a kernel is configured without capability mode support or // used in an emulator that does not implement the capability // system calls, then the Capsicum system calls will fail and set // errno to ENOSYS. In that case xz will silently run without // the sandbox. if (errno == ENOSYS) return; #endif message_fatal(_("Failed to enable the sandbox")); } #endif // ENABLE_SANDBOX #ifndef TUKLIB_DOSLIKE /// \brief Waits for input or output to become available or for a signal /// /// This uses the self-pipe trick to avoid a race condition that can occur /// if a signal is caught after user_abort has been checked but before e.g. /// read() has been called. In that situation read() could block unless /// non-blocking I/O is used. With non-blocking I/O something like select() /// or poll() is needed to avoid a busy-wait loop, and the same race condition /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in /// POSIX) but neither is portable enough in 2013. The self-pipe trick is /// old and very portable. static io_wait_ret io_wait(file_pair *pair, int timeout, bool is_reading) { struct pollfd pfd[2]; if (is_reading) { pfd[0].fd = pair->src_fd; pfd[0].events = POLLIN; } else { pfd[0].fd = pair->dest_fd; pfd[0].events = POLLOUT; } pfd[1].fd = user_abort_pipe[0]; pfd[1].events = POLLIN; while (true) { const int ret = poll(pfd, 2, timeout); if (user_abort) return IO_WAIT_ERROR; if (ret == -1) { if (errno == EINTR || errno == EAGAIN) continue; message_error(_("%s: poll() failed: %s"), is_reading ? pair->src_name : pair->dest_name, strerror(errno)); return IO_WAIT_ERROR; } if (ret == 0) return IO_WAIT_TIMEOUT; if (pfd[0].revents != 0) return IO_WAIT_MORE; } } #endif /// \brief Unlink a file /// /// This tries to verify that the file being unlinked really is the file that /// we want to unlink by verifying device and inode numbers. There's still /// a small unavoidable race, but this is much better than nothing (the file /// could have been moved/replaced even hours earlier). static void io_unlink(const char *name, const struct stat *known_st) { #if defined(TUKLIB_DOSLIKE) // On DOS-like systems, st_ino is meaningless, so don't bother // testing it. Just silence a compiler warning. (void)known_st; #else struct stat new_st; // If --force was used, use stat() instead of lstat(). This way // (de)compressing symlinks works correctly. However, it also means // that xz cannot detect if a regular file foo is renamed to bar // and then a symlink foo -> bar is created. Because of stat() // instead of lstat(), xz will think that foo hasn't been replaced // with another file. Thus, xz will remove foo even though it no // longer is the same file that xz used when it started compressing. // Probably it's not too bad though, so this doesn't need a more // complex fix. const int stat_ret = opt_force ? stat(name, &new_st) : lstat(name, &new_st); if (stat_ret # ifdef __VMS // st_ino is an array, and we don't want to // compare st_dev at all. || memcmp(&new_st.st_ino, &known_st->st_ino, sizeof(new_st.st_ino)) != 0 # else // Typical POSIX-like system || new_st.st_dev != known_st->st_dev || new_st.st_ino != known_st->st_ino # endif ) // TRANSLATORS: When compression or decompression finishes, // and xz is going to remove the source file, xz first checks // if the source file still exists, and if it does, does its // device and inode numbers match what xz saw when it opened // the source file. If these checks fail, this message is // shown, %s being the filename, and the file is not deleted. // The check for device and inode numbers is there, because // it is possible that the user has put a new file in place // of the original file, and in that case it obviously // shouldn't be removed. message_warning(_("%s: File seems to have been moved, " "not removing"), name); else #endif // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. if (unlink(name)) message_warning(_("%s: Cannot remove: %s"), name, strerror(errno)); return; } /// \brief Copies owner/group and permissions /// /// \todo ACL and EA support /// static void io_copy_attrs(const file_pair *pair) { // Skip chown and chmod on Windows. #ifndef TUKLIB_DOSLIKE // This function is more tricky than you may think at first. // Blindly copying permissions may permit users to access the // destination file who didn't have permission to access the // source file. // Try changing the owner of the file. If we aren't root or the owner // isn't already us, fchown() probably doesn't succeed. We warn // about failing fchown() only if we are root. if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) && warn_fchown) message_warning(_("%s: Cannot set the file owner: %s"), pair->dest_name, strerror(errno)); mode_t mode; // With BSD semantics the new dest file may have a group that // does not belong to the user. If the src file has the same gid // nothing has to be done. Nevertheless OpenBSD fchown(2) fails // in this case which seems to be POSIX compliant. As there is // nothing to do, skip the system call. if (pair->dest_st.st_gid != pair->src_st.st_gid && fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) { message_warning(_("%s: Cannot set the file group: %s"), pair->dest_name, strerror(errno)); // We can still safely copy some additional permissions: // `group' must be at least as strict as `other' and // also vice versa. // // NOTE: After this, the owner of the source file may // get additional permissions. This shouldn't be too bad, // because the owner would have had permission to chmod // the original file anyway. mode = ((pair->src_st.st_mode & 0070) >> 3) & (pair->src_st.st_mode & 0007); mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; } else { // Drop the setuid, setgid, and sticky bits. mode = pair->src_st.st_mode & 0777; } if (fchmod(pair->dest_fd, mode)) message_warning(_("%s: Cannot set the file permissions: %s"), pair->dest_name, strerror(errno)); #endif // Copy the timestamps. We have several possible ways to do this, of // which some are better in both security and precision. // // First, get the nanosecond part of the timestamps. As of writing, // it's not standardized by POSIX, and there are several names for // the same thing in struct stat. long atime_nsec; long mtime_nsec; # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) // GNU and Solaris atime_nsec = pair->src_st.st_atim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) // BSD atime_nsec = pair->src_st.st_atimespec.tv_nsec; mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) // GNU and BSD without extensions atime_nsec = pair->src_st.st_atimensec; mtime_nsec = pair->src_st.st_mtimensec; # elif defined(HAVE_STRUCT_STAT_ST_UATIME) // Tru64 atime_nsec = pair->src_st.st_uatime * 1000; mtime_nsec = pair->src_st.st_umtime * 1000; # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) // UnixWare atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; # else // Safe fallback atime_nsec = 0; mtime_nsec = 0; # endif // Construct a structure to hold the timestamps and call appropriate // function to set the timestamps. #if defined(HAVE_FUTIMENS) // Use nanosecond precision. struct timespec tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_nsec = atime_nsec; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_nsec = mtime_nsec; (void)futimens(pair->dest_fd, tv); #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) // Use microsecond precision. struct timeval tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_usec = atime_nsec / 1000; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_usec = mtime_nsec / 1000; # if defined(HAVE_FUTIMES) (void)futimes(pair->dest_fd, tv); # elif defined(HAVE_FUTIMESAT) (void)futimesat(pair->dest_fd, NULL, tv); # else // Argh, no function to use a file descriptor to set the timestamp. (void)utimes(pair->dest_name, tv); # endif #elif defined(HAVE__FUTIME) // Use one-second precision with Windows-specific _futime(). // We could use utime() too except that for some reason the // timestamp will get reset at close(). With _futime() it works. // This struct cannot be const as _futime() takes a non-const pointer. struct _utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)_futime(pair->dest_fd, &buf); #elif defined(HAVE_UTIME) // Use one-second precision. utime() doesn't support using file // descriptor either. Some systems have broken utime() prototype // so don't make this const. struct utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)utime(pair->dest_name, &buf); #endif return; } /// Opens the source file. Returns false on success, true on error. static bool io_open_src_real(file_pair *pair) { // There's nothing to open when reading from stdin. if (pair->src_name == stdin_filename) { pair->src_fd = STDIN_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDIN_FILENO, O_BINARY); #else // Try to set stdin to non-blocking mode. It won't work // e.g. on OpenBSD if stdout is e.g. /dev/null. In such // case we proceed as if stdin were non-blocking anyway // (in case of /dev/null it will be in practice). The // same applies to stdout in io_open_dest_real(). stdin_flags = fcntl(STDIN_FILENO, F_GETFL); if (stdin_flags == -1) { message_error(_("Error getting the file status flags " "from standard input: %s"), strerror(errno)); return true; } if ((stdin_flags & O_NONBLOCK) == 0 && fcntl(STDIN_FILENO, F_SETFL, stdin_flags | O_NONBLOCK) != -1) restore_stdin_flags = true; #endif #ifdef HAVE_POSIX_FADVISE // It will fail if stdin is a pipe and that's fine. (void)posix_fadvise(STDIN_FILENO, 0, 0, opt_mode == MODE_LIST ? POSIX_FADV_RANDOM : POSIX_FADV_SEQUENTIAL); #endif return false; } // Symlinks are not followed unless writing to stdout or --force // or --keep was used. const bool follow_symlinks = opt_stdout || opt_force || opt_keep_original; // We accept only regular files if we are writing the output // to disk too. bzip2 allows overriding this with --force but // gzip and xz don't. const bool reg_files_only = !opt_stdout; // Flags for open() int flags = O_RDONLY | O_BINARY | O_NOCTTY; #ifndef TUKLIB_DOSLIKE // Use non-blocking I/O: // - It prevents blocking when opening FIFOs and some other // special files, which is good if we want to accept only // regular files. // - It can help avoiding some race conditions with signal handling. flags |= O_NONBLOCK; #endif #if defined(O_NOFOLLOW) if (!follow_symlinks) flags |= O_NOFOLLOW; #elif !defined(TUKLIB_DOSLIKE) // Some POSIX-like systems lack O_NOFOLLOW (it's not required // by POSIX). Check for symlinks with a separate lstat() on // these systems. if (!follow_symlinks) { struct stat st; if (lstat(pair->src_name, &st)) { - message_error("%s: %s", pair->src_name, + message_error(_("%s: %s"), pair->src_name, strerror(errno)); return true; } else if (S_ISLNK(st.st_mode)) { message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); return true; } } #else // Avoid warnings. (void)follow_symlinks; #endif // Try to open the file. Signals have been blocked so EINTR shouldn't // be possible. pair->src_fd = open(pair->src_name, flags); if (pair->src_fd == -1) { // Signals (that have a signal handler) have been blocked. assert(errno != EINTR); #ifdef O_NOFOLLOW // Give an understandable error message if the reason // for failing was that the file was a symbolic link. // // Note that at least Linux, OpenBSD, Solaris, and Darwin // use ELOOP to indicate that O_NOFOLLOW was the reason // that open() failed. Because there may be // directories in the pathname, ELOOP may occur also // because of a symlink loop in the directory part. // So ELOOP doesn't tell us what actually went wrong, // and this stupidity went into POSIX-1.2008 too. // // FreeBSD associates EMLINK with O_NOFOLLOW and // Tru64 uses ENOTSUP. We use these directly here // and skip the lstat() call and the associated race. // I want to hear if there are other kernels that // fail with something else than ELOOP with O_NOFOLLOW. bool was_symlink = false; # if defined(__FreeBSD__) || defined(__DragonFly__) if (errno == EMLINK) was_symlink = true; # elif defined(__digital__) && defined(__unix__) if (errno == ENOTSUP) was_symlink = true; # elif defined(__NetBSD__) if (errno == EFTYPE) was_symlink = true; # else if (errno == ELOOP && !follow_symlinks) { const int saved_errno = errno; struct stat st; if (lstat(pair->src_name, &st) == 0 && S_ISLNK(st.st_mode)) was_symlink = true; errno = saved_errno; } # endif if (was_symlink) message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); else #endif // Something else than O_NOFOLLOW failing // (assuming that the race conditions didn't // confuse us). - message_error("%s: %s", pair->src_name, + message_error(_("%s: %s"), pair->src_name, strerror(errno)); return true; } // Stat the source file. We need the result also when we copy // the permissions, and when unlinking. // // NOTE: Use stat() instead of fstat() with DJGPP, because // then we have a better chance to get st_ino value that can // be used in io_open_dest_real() to prevent overwriting the // source file. #ifdef __DJGPP__ if (stat(pair->src_name, &pair->src_st)) goto error_msg; #else if (fstat(pair->src_fd, &pair->src_st)) goto error_msg; #endif if (S_ISDIR(pair->src_st.st_mode)) { message_warning(_("%s: Is a directory, skipping"), pair->src_name); goto error; } if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { message_warning(_("%s: Not a regular file, skipping"), pair->src_name); goto error; } #ifndef TUKLIB_DOSLIKE if (reg_files_only && !opt_force && !opt_keep_original) { if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { // gzip rejects setuid and setgid files even // when --force was used. bzip2 doesn't check // for them, but calls fchown() after fchmod(), // and many systems automatically drop setuid // and setgid bits there. // // We accept setuid and setgid files if // --force or --keep was used. We drop these bits // explicitly in io_copy_attr(). message_warning(_("%s: File has setuid or " "setgid bit set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_mode & S_ISVTX) { message_warning(_("%s: File has sticky bit " "set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_nlink > 1) { message_warning(_("%s: Input file has more " "than one hard link, " "skipping"), pair->src_name); goto error; } } // If it is something else than a regular file, wait until // there is input available. This way reading from FIFOs // will work when open() is used with O_NONBLOCK. if (!S_ISREG(pair->src_st.st_mode)) { signals_unblock(); const io_wait_ret ret = io_wait(pair, -1, true); signals_block(); if (ret != IO_WAIT_MORE) goto error; } #endif #ifdef HAVE_POSIX_FADVISE // It will fail with some special files like FIFOs but that is fine. (void)posix_fadvise(pair->src_fd, 0, 0, opt_mode == MODE_LIST ? POSIX_FADV_RANDOM : POSIX_FADV_SEQUENTIAL); #endif return false; error_msg: - message_error("%s: %s", pair->src_name, strerror(errno)); + message_error(_("%s: %s"), pair->src_name, strerror(errno)); error: (void)close(pair->src_fd); return true; } extern file_pair * io_open_src(const char *src_name) { if (src_name[0] == '\0') { message_error(_("Empty filename, skipping")); return NULL; } // Since we have only one file open at a time, we can use // a statically allocated structure. static file_pair pair; // This implicitly also initializes src_st.st_size to zero // which is expected to be <= 0 by default. fstat() isn't // called when reading from standard input but src_st.st_size // is still read. pair = (file_pair){ .src_name = src_name, .dest_name = NULL, .src_fd = -1, .dest_fd = -1, .src_eof = false, .src_has_seen_input = false, .flush_needed = false, .dest_try_sparse = false, .dest_pending_sparse = 0, }; // Block the signals, for which we have a custom signal handler, so // that we don't need to worry about EINTR. signals_block(); const bool error = io_open_src_real(&pair); signals_unblock(); #ifdef ENABLE_SANDBOX if (!error) io_sandbox_enter(pair.src_fd); #endif return error ? NULL : &pair; } /// \brief Closes source file of the file_pair structure /// /// \param pair File whose src_fd should be closed /// \param success If true, the file will be removed from the disk if /// closing succeeds and --keep hasn't been used. static void io_close_src(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE if (restore_stdin_flags) { assert(pair->src_fd == STDIN_FILENO); restore_stdin_flags = false; if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) message_error(_("Error restoring the status flags " "to standard input: %s"), strerror(errno)); } #endif if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { // Close the file before possibly unlinking it. On DOS-like // systems this is always required since unlinking will fail // if the file is open. On POSIX systems it usually works // to unlink open files, but in some cases it doesn't and // one gets EBUSY in errno. // // xz 5.2.2 and older unlinked the file before closing it // (except on DOS-like systems). The old code didn't handle // EBUSY and could fail e.g. on some CIFS shares. The // advantage of unlinking before closing is negligible // (avoids a race between close() and stat()/lstat() and // unlink()), so let's keep this simple. (void)close(pair->src_fd); if (success && !opt_keep_original) io_unlink(pair->src_name, &pair->src_st); } return; } static bool io_open_dest_real(file_pair *pair) { if (opt_stdout || pair->src_fd == STDIN_FILENO) { // We don't modify or free() this. pair->dest_name = (char *)"(stdout)"; pair->dest_fd = STDOUT_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDOUT_FILENO, O_BINARY); #else // Try to set O_NONBLOCK if it isn't already set. // If it fails, we assume that stdout is non-blocking // in practice. See the comments in io_open_src_real() // for similar situation with stdin. // // NOTE: O_APPEND may be unset later in this function // and it relies on stdout_flags being set here. stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); if (stdout_flags == -1) { message_error(_("Error getting the file status flags " "from standard output: %s"), strerror(errno)); return true; } if ((stdout_flags & O_NONBLOCK) == 0 && fcntl(STDOUT_FILENO, F_SETFL, stdout_flags | O_NONBLOCK) != -1) restore_stdout_flags = true; #endif } else { pair->dest_name = suffix_get_dest_name(pair->src_name); if (pair->dest_name == NULL) return true; #ifdef __DJGPP__ struct stat st; if (stat(pair->dest_name, &st) == 0) { // Check that it isn't a special file like "prn". if (st.st_dev == -1) { message_error("%s: Refusing to write to " "a DOS special file", pair->dest_name); free(pair->dest_name); return true; } // Check that we aren't overwriting the source file. if (st.st_dev == pair->src_st.st_dev && st.st_ino == pair->src_st.st_ino) { message_error("%s: Output file is the same " "as the input file", pair->dest_name); free(pair->dest_name); return true; } } #endif // If --force was used, unlink the target file first. if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { message_error(_("%s: Cannot remove: %s"), pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } // Open the file. int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL; #ifndef TUKLIB_DOSLIKE flags |= O_NONBLOCK; #endif const mode_t mode = S_IRUSR | S_IWUSR; pair->dest_fd = open(pair->dest_name, flags, mode); if (pair->dest_fd == -1) { - message_error("%s: %s", pair->dest_name, + message_error(_("%s: %s"), pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } } #ifndef TUKLIB_DOSLIKE // dest_st isn't used on DOS-like systems except as a dummy // argument to io_unlink(), so don't fstat() on such systems. if (fstat(pair->dest_fd, &pair->dest_st)) { // If fstat() really fails, we have a safe fallback here. # if defined(__VMS) pair->dest_st.st_ino[0] = 0; pair->dest_st.st_ino[1] = 0; pair->dest_st.st_ino[2] = 0; # else pair->dest_st.st_dev = 0; pair->dest_st.st_ino = 0; # endif } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { // When writing to standard output, we need to be extra // careful: // - It may be connected to something else than // a regular file. // - We aren't necessarily writing to a new empty file // or to the end of an existing file. // - O_APPEND may be active. // // TODO: I'm keeping this disabled for DOS-like systems // for now. FAT doesn't support sparse files, but NTFS // does, so maybe this should be enabled on Windows after // some testing. if (pair->dest_fd == STDOUT_FILENO) { if (!S_ISREG(pair->dest_st.st_mode)) return false; if (stdout_flags & O_APPEND) { // Creating a sparse file is not possible // when O_APPEND is active (it's used by // shell's >> redirection). As I understand // it, it is safe to temporarily disable // O_APPEND in xz, because if someone // happened to write to the same file at the // same time, results would be bad anyway // (users shouldn't assume that xz uses any // specific block size when writing data). // // The write position may be something else // than the end of the file, so we must fix // it to start writing at the end of the file // to imitate O_APPEND. if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) return false; // Construct the new file status flags. // If O_NONBLOCK was set earlier in this // function, it must be kept here too. int flags = stdout_flags & ~O_APPEND; if (restore_stdout_flags) flags |= O_NONBLOCK; // If this fcntl() fails, we continue but won't // try to create sparse output. The original // flags will still be restored if needed (to // unset O_NONBLOCK) when the file is finished. if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) return false; // Disabling O_APPEND succeeded. Mark // that the flags should be restored // in io_close_dest(). (This may have already // been set when enabling O_NONBLOCK.) restore_stdout_flags = true; } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) != pair->dest_st.st_size) { // Writing won't start exactly at the end // of the file. We cannot use sparse output, // because it would probably corrupt the file. return false; } } pair->dest_try_sparse = true; } #endif return false; } extern bool io_open_dest(file_pair *pair) { signals_block(); const bool ret = io_open_dest_real(pair); signals_unblock(); return ret; } /// \brief Closes destination file of the file_pair structure /// /// \param pair File whose dest_fd should be closed /// \param success If false, the file will be removed from the disk. /// /// \return Zero if closing succeeds. On error, -1 is returned and /// error message printed. static bool io_close_dest(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE // If io_open_dest() has disabled O_APPEND, restore it here. if (restore_stdout_flags) { assert(pair->dest_fd == STDOUT_FILENO); restore_stdout_flags = false; if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { message_error(_("Error restoring the O_APPEND flag " "to standard output: %s"), strerror(errno)); return true; } } #endif if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) return false; if (close(pair->dest_fd)) { message_error(_("%s: Closing the file failed: %s"), pair->dest_name, strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return true; } // If the operation using this file wasn't successful, we git rid // of the junk file. if (!success) io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return false; } extern void io_close(file_pair *pair, bool success) { // Take care of sparseness at the end of the output file. if (success && pair->dest_try_sparse && pair->dest_pending_sparse > 0) { // Seek forward one byte less than the size of the pending // hole, then write one zero-byte. This way the file grows // to its correct size. An alternative would be to use // ftruncate() but that isn't portable enough (e.g. it // doesn't work with FAT on Linux; FAT isn't that important // since it doesn't support sparse files anyway, but we don't // want to create corrupt files on it). if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when trying " "to create a sparse file: %s"), pair->dest_name, strerror(errno)); success = false; } else { const uint8_t zero[1] = { '\0' }; if (io_write_buf(pair, zero, 1)) success = false; } } signals_block(); // Copy the file attributes. We need to skip this if destination // file isn't open or it is standard output. if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); // Close the destination first. If it fails, we must not remove // the source file! if (io_close_dest(pair, success)) success = false; // Close the source file, and unlink it if the operation using this // file pair was successful and we haven't requested to keep the // source file. io_close_src(pair, success); signals_unblock(); return; } extern void io_fix_src_pos(file_pair *pair, size_t rewind_size) { assert(rewind_size <= IO_BUFFER_SIZE); if (rewind_size > 0) { // This doesn't need to work on unseekable file descriptors, // so just ignore possible errors. (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); } return; } extern size_t io_read(file_pair *pair, io_buf *buf, size_t size) { // We use small buffers here. assert(size < SSIZE_MAX); size_t pos = 0; while (pos < size) { const ssize_t amount = read( pair->src_fd, buf->u8 + pos, size - pos); if (amount == 0) { pair->src_eof = true; break; } if (amount == -1) { if (errno == EINTR) { if (user_abort) return SIZE_MAX; continue; } #ifndef TUKLIB_DOSLIKE if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { // Disable the flush-timeout if no input has // been seen since the previous flush and thus // there would be nothing to flush after the // timeout expires (avoids busy waiting). const int timeout = pair->src_has_seen_input ? mytime_get_flush_timeout() : -1; switch (io_wait(pair, timeout, true)) { case IO_WAIT_MORE: continue; case IO_WAIT_ERROR: return SIZE_MAX; case IO_WAIT_TIMEOUT: pair->flush_needed = true; return pos; default: message_bug(); } } #endif message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); return SIZE_MAX; } pos += (size_t)(amount); if (!pair->src_has_seen_input) { pair->src_has_seen_input = true; mytime_set_flush_time(); } } return pos; } extern bool io_seek_src(file_pair *pair, uint64_t pos) { // Caller must not attempt to seek past the end of the input file // (seeking to 100 in a 100-byte file is seeking to the end of // the file, not past the end of the file, and thus that is allowed). // // This also validates that pos can be safely cast to off_t. if (pos > (uint64_t)(pair->src_st.st_size)) message_bug(); if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { message_error(_("%s: Error seeking the file: %s"), pair->src_name, strerror(errno)); return true; } pair->src_eof = false; return false; } extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) { // Using lseek() and read() is more portable than pread() and // for us it is as good as real pread(). if (io_seek_src(pair, pos)) return true; const size_t amount = io_read(pair, buf, size); if (amount == SIZE_MAX) return true; if (amount != size) { message_error(_("%s: Unexpected end of file"), pair->src_name); return true; } return false; } static bool is_sparse(const io_buf *buf) { assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) if (buf->u64[i] != 0) return false; return true; } static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) { assert(size < SSIZE_MAX); while (size > 0) { const ssize_t amount = write(pair->dest_fd, buf, size); if (amount == -1) { if (errno == EINTR) { if (user_abort) return true; continue; } #ifndef TUKLIB_DOSLIKE if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { if (io_wait(pair, -1, false) == IO_WAIT_MORE) continue; return true; } #endif // Handle broken pipe specially. gzip and bzip2 // don't print anything on SIGPIPE. In addition, // gzip --quiet uses exit status 2 (warning) on // broken pipe instead of whatever raise(SIGPIPE) // would make it return. It is there to hide "Broken // pipe" message on some old shells (probably old // GNU bash). // // We don't do anything special with --quiet, which // is what bzip2 does too. If we get SIGPIPE, we // will handle it like other signals by setting // user_abort, and get EPIPE here. if (errno != EPIPE) message_error(_("%s: Write error: %s"), pair->dest_name, strerror(errno)); return true; } buf += (size_t)(amount); size -= (size_t)(amount); } return false; } extern bool io_write(file_pair *pair, const io_buf *buf, size_t size) { assert(size <= IO_BUFFER_SIZE); if (pair->dest_try_sparse) { // Check if the block is sparse (contains only zeros). If it // sparse, we just store the amount and return. We will take // care of actually skipping over the hole when we hit the // next data block or close the file. // // Since io_close() requires that dest_pending_sparse > 0 // if the file ends with sparse block, we must also return // if size == 0 to avoid doing the lseek(). if (size == IO_BUFFER_SIZE) { // Even if the block was sparse, treat it as non-sparse // if the pending sparse amount is large compared to // the size of off_t. In practice this only matters // on 32-bit systems where off_t isn't always 64 bits. const off_t pending_max = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); if (is_sparse(buf) && pair->dest_pending_sparse < pending_max) { pair->dest_pending_sparse += (off_t)(size); return false; } } else if (size == 0) { return false; } // This is not a sparse block. If we have a pending hole, // skip it now. if (pair->dest_pending_sparse > 0) { if (lseek(pair->dest_fd, pair->dest_pending_sparse, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when " "trying to create a sparse " "file: %s"), pair->dest_name, strerror(errno)); return true; } pair->dest_pending_sparse = 0; } } return io_write_buf(pair, buf->u8, size); } diff --git a/contrib/xz/src/xz/hardware.c b/contrib/xz/src/xz/hardware.c index ccdc3b9eb5c9..c6948821862a 100644 --- a/contrib/xz/src/xz/hardware.c +++ b/contrib/xz/src/xz/hardware.c @@ -1,338 +1,338 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file hardware.c /// \brief Detection of available hardware resources // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" /// Maximum number of worker threads. This can be set with /// the --threads=NUM command line option. static uint32_t threads_max = 1; /// True when the number of threads is automatically determined based /// on the available hardware threads. static bool threads_are_automatic = false; /// If true, then try to use multi-threaded mode (if memlimit allows) /// even if only one thread was requested explicitly (-T+1). static bool use_mt_mode_with_one_thread = false; /// Memory usage limit for compression static uint64_t memlimit_compress = 0; /// Memory usage limit for decompression static uint64_t memlimit_decompress = 0; /// Default memory usage for multithreaded modes: /// /// - Default value for --memlimit-compress when automatic number of threads /// is used. However, if the limit wouldn't allow even one thread then /// the limit is ignored in coder.c and one thread will be used anyway. /// This mess is a compromise: we wish to prevent -T0 from using too /// many threads but we also don't want xz to give an error due to /// a memlimit that the user didn't explicitly set. /// /// - Default value for --memlimit-mt-decompress /// -/// This value is caluclated in hardware_init() and cannot be changed later. +/// This value is calculated in hardware_init() and cannot be changed later. static uint64_t memlimit_mt_default; /// Memory usage limit for multithreaded decompression. This is a soft limit: /// if reducing the number of threads to one isn't enough to keep memory /// usage below this limit, then one thread is used and this limit is ignored. /// memlimit_decompress is still obeyed. /// /// This can be set with --memlimit-mt-decompress. The default value for /// this is memlimit_mt_default. static uint64_t memlimit_mtdec; /// Total amount of physical RAM static uint64_t total_ram; extern void hardware_threads_set(uint32_t n) { // Reset these to false first and set them to true when appropriate. threads_are_automatic = false; use_mt_mode_with_one_thread = false; if (n == 0) { // Automatic number of threads was requested. // If there is only one hardware thread, multi-threaded // mode will still be used if memory limit allows. threads_are_automatic = true; use_mt_mode_with_one_thread = true; // If threading support was enabled at build time, // use the number of available CPU cores. Otherwise // use one thread since disabling threading support // omits lzma_cputhreads() from liblzma. #ifdef MYTHREAD_ENABLED threads_max = lzma_cputhreads(); if (threads_max == 0) threads_max = 1; #else threads_max = 1; #endif } else if (n == UINT32_MAX) { use_mt_mode_with_one_thread = true; threads_max = 1; } else { threads_max = n; } return; } extern uint32_t hardware_threads_get(void) { return threads_max; } extern bool hardware_threads_is_mt(void) { #ifdef MYTHREAD_ENABLED return threads_max > 1 || use_mt_mode_with_one_thread; #else return false; #endif } extern void hardware_memlimit_set(uint64_t new_memlimit, bool set_compress, bool set_decompress, bool set_mtdec, bool is_percentage) { if (is_percentage) { assert(new_memlimit > 0); assert(new_memlimit <= 100); new_memlimit = (uint32_t)new_memlimit * total_ram / 100; } if (set_compress) { memlimit_compress = new_memlimit; #if SIZE_MAX == UINT32_MAX // FIXME? // // When running a 32-bit xz on a system with a lot of RAM and // using a percentage-based memory limit, the result can be // bigger than the 32-bit address space. Limiting the limit // below SIZE_MAX for compression (not decompression) makes // xz lower the compression settings (or number of threads) // to a level that *might* work. In practice it has worked // when using a 64-bit kernel that gives full 4 GiB address // space to 32-bit programs. In other situations this might // still be too high, like 32-bit kernels that may give much // less than 4 GiB to a single application. // // So this is an ugly hack but I will keep it here while // it does more good than bad. // // Use a value less than SIZE_MAX so that there's some room // for the xz program and so on. Don't use 4000 MiB because // it could look like someone mixed up base-2 and base-10. #ifdef __mips__ - // For MIPS32, due to architectural pecularities, + // For MIPS32, due to architectural peculiarities, // the limit is even lower. const uint64_t limit_max = UINT64_C(2000) << 20; #else const uint64_t limit_max = UINT64_C(4020) << 20; #endif // UINT64_MAX is a special case for the string "max" so // that has to be handled specially. if (memlimit_compress != UINT64_MAX && memlimit_compress > limit_max) memlimit_compress = limit_max; #endif } if (set_decompress) memlimit_decompress = new_memlimit; if (set_mtdec) memlimit_mtdec = new_memlimit; return; } extern uint64_t hardware_memlimit_get(enum operation_mode mode) { // 0 is a special value that indicates the default. // It disables the limit in single-threaded mode. // // NOTE: For multithreaded decompression, this is the hard limit // (memlimit_stop). hardware_memlimit_mtdec_get() gives the // soft limit (memlimit_threaded). const uint64_t memlimit = mode == MODE_COMPRESS ? memlimit_compress : memlimit_decompress; return memlimit != 0 ? memlimit : UINT64_MAX; } extern uint64_t hardware_memlimit_mtenc_get(void) { return hardware_memlimit_mtenc_is_default() ? memlimit_mt_default : hardware_memlimit_get(MODE_COMPRESS); } extern bool hardware_memlimit_mtenc_is_default(void) { return memlimit_compress == 0 && threads_are_automatic; } extern uint64_t hardware_memlimit_mtdec_get(void) { uint64_t m = memlimit_mtdec != 0 ? memlimit_mtdec : memlimit_mt_default; // Cap the value to memlimit_decompress if it has been specified. // This is nice for --info-memory. It wouldn't be needed for liblzma // since it does this anyway. if (memlimit_decompress != 0 && m > memlimit_decompress) m = memlimit_decompress; return m; } /// Helper for hardware_memlimit_show() to print one human-readable info line. static void memlimit_show(const char *str, size_t str_columns, uint64_t value) { // Calculate the field width so that str will be padded to take // str_columns on the terminal. // // NOTE: If the string is invalid, this will be -1. Using -1 as // the field width is fine here so it's not handled specially. const int fw = tuklib_mbstr_fw(str, (int)(str_columns)); // The memory usage limit is considered to be disabled if value // is 0 or UINT64_MAX. This might get a bit more complex once there // is threading support. See the comment in hardware_memlimit_get(). if (value == 0 || value == UINT64_MAX) printf(" %-*s %s\n", fw, str, _("Disabled")); else printf(" %-*s %s MiB (%s B)\n", fw, str, uint64_to_str(round_up_to_mib(value), 0), uint64_to_str(value, 1)); return; } extern void hardware_memlimit_show(void) { uint32_t cputhreads = 1; #ifdef MYTHREAD_ENABLED cputhreads = lzma_cputhreads(); if (cputhreads == 0) cputhreads = 1; #endif if (opt_robot) { printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu32 "\n", total_ram, memlimit_compress, memlimit_decompress, hardware_memlimit_mtdec_get(), memlimit_mt_default, cputhreads); } else { const char *msgs[] = { _("Amount of physical memory (RAM):"), _("Number of processor threads:"), _("Compression:"), _("Decompression:"), _("Multi-threaded decompression:"), _("Default for -T0:"), }; size_t width_max = 1; for (unsigned i = 0; i < ARRAY_SIZE(msgs); ++i) { size_t w = tuklib_mbstr_width(msgs[i], NULL); // When debugging, catch invalid strings with // an assertion. Otherwise fallback to 1 so // that the columns just won't be aligned. assert(w != (size_t)-1); if (w == (size_t)-1) w = 1; if (width_max < w) width_max = w; } puts(_("Hardware information:")); memlimit_show(msgs[0], width_max, total_ram); printf(" %-*s %" PRIu32 "\n", tuklib_mbstr_fw(msgs[1], (int)(width_max)), msgs[1], cputhreads); putchar('\n'); puts(_("Memory usage limits:")); memlimit_show(msgs[2], width_max, memlimit_compress); memlimit_show(msgs[3], width_max, memlimit_decompress); memlimit_show(msgs[4], width_max, hardware_memlimit_mtdec_get()); memlimit_show(msgs[5], width_max, memlimit_mt_default); } tuklib_exit(E_SUCCESS, E_ERROR, message_verbosity_get() != V_SILENT); } extern void hardware_init(void) { // Get the amount of RAM. If we cannot determine it, // use the assumption defined by the configure script. total_ram = lzma_physmem(); if (total_ram == 0) total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; // FIXME? There may be better methods to determine the default value. // One Linux-specific suggestion is to use MemAvailable from // /proc/meminfo as the starting point. memlimit_mt_default = total_ram / 4; #if SIZE_MAX == UINT32_MAX // A too high value may cause 32-bit xz to run out of address space. // Use a conservative maximum value here. A few typical address space // sizes with Linux: // - x86-64 with 32-bit xz: 4 GiB // - x86: 3 GiB // - MIPS32: 2 GiB const size_t mem_ceiling = 1400U << 20; if (memlimit_mt_default > mem_ceiling) memlimit_mt_default = mem_ceiling; #endif return; } diff --git a/contrib/xz/src/xz/list.c b/contrib/xz/src/xz/list.c index 90799bd26eb4..86c3a762f560 100644 --- a/contrib/xz/src/xz/list.c +++ b/contrib/xz/src/xz/list.c @@ -1,1317 +1,1318 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file list.c /// \brief Listing information about .xz files // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include "tuklib_integer.h" /// Information about a .xz file typedef struct { /// Combined Index of all Streams in the file lzma_index *idx; /// Total amount of Stream Padding uint64_t stream_padding; /// Highest memory usage so far uint64_t memusage_max; /// True if all Blocks so far have Compressed Size and /// Uncompressed Size fields bool all_have_sizes; /// Oldest XZ Utils version that will decompress the file uint32_t min_version; } xz_file_info; #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } /// Information about a .xz Block typedef struct { /// Size of the Block Header uint32_t header_size; /// A few of the Block Flags as a string char flags[3]; /// Size of the Compressed Data field in the Block lzma_vli compressed_size; /// Decoder memory usage for this Block uint64_t memusage; /// The filter chain of this Block in human-readable form char *filter_chain; } block_header_info; #define BLOCK_HEADER_INFO_INIT { .filter_chain = NULL } #define block_header_info_end(bhi) free((bhi)->filter_chain) /// Strings ending in a colon. These are used for lines like /// " Foo: 123 MiB". These are grouped because translated strings /// may have different maximum string length, and we want to pad all /// strings so that the values are aligned nicely. static const char *colon_strs[] = { N_("Streams:"), N_("Blocks:"), N_("Compressed size:"), N_("Uncompressed size:"), N_("Ratio:"), N_("Check:"), N_("Stream Padding:"), N_("Memory needed:"), N_("Sizes in headers:"), // This won't be aligned because it's so long: //N_("Minimum XZ Utils version:"), N_("Number of files:"), }; /// Enum matching the above strings. enum { COLON_STR_STREAMS, COLON_STR_BLOCKS, COLON_STR_COMPRESSED_SIZE, COLON_STR_UNCOMPRESSED_SIZE, COLON_STR_RATIO, COLON_STR_CHECK, COLON_STR_STREAM_PADDING, COLON_STR_MEMORY_NEEDED, COLON_STR_SIZES_IN_HEADERS, //COLON_STR_MINIMUM_XZ_VERSION, COLON_STR_NUMBER_OF_FILES, }; /// Field widths to use with printf to pad the strings to use the same number /// of columns on a terminal. static int colon_strs_fw[ARRAY_SIZE(colon_strs)]; /// Convenience macro to get the translated string and its field width /// using a COLON_STR_foo enum. #define COLON_STR(num) colon_strs_fw[num], _(colon_strs[num]) /// Column headings static struct { /// Table column heading string const char *str; /// Number of terminal-columns to use for this table-column. /// If a translated string is longer than the initial value, /// this value will be increased in init_headings(). int columns; /// Field width to use for printf() to pad "str" to use "columns" /// number of columns on a terminal. This is calculated in /// init_headings(). int fw; } headings[] = { { N_("Stream"), 6, 0 }, { N_("Block"), 9, 0 }, { N_("Blocks"), 9, 0 }, { N_("CompOffset"), 15, 0 }, { N_("UncompOffset"), 15, 0 }, { N_("CompSize"), 15, 0 }, { N_("UncompSize"), 15, 0 }, { N_("TotalSize"), 15, 0 }, { N_("Ratio"), 5, 0 }, { N_("Check"), 10, 0 }, { N_("CheckVal"), 1, 0 }, { N_("Padding"), 7, 0 }, { N_("Header"), 5, 0 }, { N_("Flags"), 2, 0 }, { N_("MemUsage"), 7 + 4, 0 }, // +4 is for " MiB" { N_("Filters"), 1, 0 }, }; /// Enum matching the above strings. enum { HEADING_STREAM, HEADING_BLOCK, HEADING_BLOCKS, HEADING_COMPOFFSET, HEADING_UNCOMPOFFSET, HEADING_COMPSIZE, HEADING_UNCOMPSIZE, HEADING_TOTALSIZE, HEADING_RATIO, HEADING_CHECK, HEADING_CHECKVAL, HEADING_PADDING, HEADING_HEADERSIZE, HEADING_HEADERFLAGS, HEADING_MEMUSAGE, HEADING_FILTERS, }; #define HEADING_STR(num) headings[num].fw, _(headings[num].str) /// Check ID to string mapping static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { // TRANSLATORS: Indicates that there is no integrity check. // This string is used in tables. In older xz version this // string was limited to ten columns in a fixed-width font, but // nowadays there is no strict length restriction anymore. N_("None"), "CRC32", // TRANSLATORS: Indicates that integrity check name is not known, // but the Check ID is known (here 2). In older xz version these // strings were limited to ten columns in a fixed-width font, but // nowadays there is no strict length restriction anymore. N_("Unknown-2"), N_("Unknown-3"), "CRC64", N_("Unknown-5"), N_("Unknown-6"), N_("Unknown-7"), N_("Unknown-8"), N_("Unknown-9"), "SHA-256", N_("Unknown-11"), N_("Unknown-12"), N_("Unknown-13"), N_("Unknown-14"), N_("Unknown-15"), }; /// Buffer size for get_check_names(). This may be a bit ridiculous, /// but at least it's enough if some language needs many multibyte chars. #define CHECKS_STR_SIZE 1024 /// Value of the Check field as hexadecimal string. /// This is set by parse_check_value(). static char check_value[2 * LZMA_CHECK_SIZE_MAX + 1]; /// Totals that are displayed if there was more than one file. /// The "files" counter is also used in print_info_adv() to show /// the file number. static struct { uint64_t files; uint64_t streams; uint64_t blocks; uint64_t compressed_size; uint64_t uncompressed_size; uint64_t stream_padding; uint64_t memusage_max; uint32_t checks; uint32_t min_version; bool all_have_sizes; } totals = { 0, 0, 0, 0, 0, 0, 0, 0, 50000002, true }; /// Initialize colon_strs_fw[]. static void init_colon_strs(void) { // Lengths of translated strings as bytes. size_t lens[ARRAY_SIZE(colon_strs)]; // Lengths of translated strings as columns. size_t widths[ARRAY_SIZE(colon_strs)]; // Maximum number of columns needed by a translated string. size_t width_max = 0; for (unsigned i = 0; i < ARRAY_SIZE(colon_strs); ++i) { widths[i] = tuklib_mbstr_width(_(colon_strs[i]), &lens[i]); // If debugging is enabled, catch invalid strings with // an assertion. However, when not debugging, use the // byte count as the fallback width. This shouldn't // ever happen unless there is a bad string in the // translations, but in such case I guess it's better // to try to print something useful instead of failing // completely. assert(widths[i] != (size_t)-1); if (widths[i] == (size_t)-1) widths[i] = lens[i]; if (widths[i] > width_max) width_max = widths[i]; } // Calculate the field width for printf("%*s") so that the strings // will use width_max columns on a terminal. for (unsigned i = 0; i < ARRAY_SIZE(colon_strs); ++i) colon_strs_fw[i] = (int)(lens[i] + width_max - widths[i]); return; } /// Initialize headings[]. static void init_headings(void) { // Before going through the heading strings themselves, treat // the Check heading specially: Look at the widths of the various // check names and increase the width of the Check column if needed. // The width of the heading name "Check" will then be handled normally // with other heading names in the second loop in this function. for (unsigned i = 0; i < ARRAY_SIZE(check_names); ++i) { size_t len; size_t w = tuklib_mbstr_width(_(check_names[i]), &len); // Error handling like in init_colon_strs(). assert(w != (size_t)-1); if (w == (size_t)-1) w = len; // If the translated string is wider than the minimum width // set at compile time, increase the width. if ((size_t)(headings[HEADING_CHECK].columns) < w) headings[HEADING_CHECK].columns = (int)w; } for (unsigned i = 0; i < ARRAY_SIZE(headings); ++i) { size_t len; size_t w = tuklib_mbstr_width(_(headings[i].str), &len); // Error handling like in init_colon_strs(). assert(w != (size_t)-1); if (w == (size_t)-1) w = len; // If the translated string is wider than the minimum width // set at compile time, increase the width. if ((size_t)(headings[i].columns) < w) headings[i].columns = (int)w; // Calculate the field width for printf("%*s") so that // the string uses .columns number of columns on a terminal. headings[i].fw = (int)(len + (size_t)headings[i].columns - w); } return; } /// Initialize the printf field widths that are needed to get nicely aligned /// output with translated strings. static void init_field_widths(void) { init_colon_strs(); init_headings(); return; } /// Convert XZ Utils version number to a string. static const char * xz_ver_to_str(uint32_t ver) { static char buf[32]; unsigned int major = ver / 10000000U; ver -= major * 10000000U; unsigned int minor = ver / 10000U; ver -= minor * 10000U; unsigned int patch = ver / 10U; ver -= patch * 10U; const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : ""; snprintf(buf, sizeof(buf), "%u.%u.%u%s", major, minor, patch, stability); return buf; } /// \brief Parse the Index(es) from the given .xz file /// /// \param xfi Pointer to structure where the decoded information /// is stored. /// \param pair Input file /// /// \return On success, false is returned. On error, true is returned. /// static bool parse_indexes(xz_file_info *xfi, file_pair *pair) { if (pair->src_st.st_size <= 0) { message_error(_("%s: File is empty"), pair->src_name); return true; } if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { message_error(_("%s: Too small to be a valid .xz file"), pair->src_name); return true; } io_buf buf; lzma_stream strm = LZMA_STREAM_INIT; lzma_index *idx = NULL; lzma_ret ret = lzma_file_info_decoder(&strm, &idx, hardware_memlimit_get(MODE_LIST), (uint64_t)(pair->src_st.st_size)); if (ret != LZMA_OK) { - message_error("%s: %s", pair->src_name, message_strm(ret)); + message_error(_("%s: %s"), pair->src_name, message_strm(ret)); return true; } while (true) { if (strm.avail_in == 0) { strm.next_in = buf.u8; strm.avail_in = io_read(pair, &buf, IO_BUFFER_SIZE); if (strm.avail_in == SIZE_MAX) goto error; } ret = lzma_code(&strm, LZMA_RUN); switch (ret) { case LZMA_OK: break; case LZMA_SEEK_NEEDED: // liblzma won't ask us to seek past the known size // of the input file. assert(strm.seek_pos <= (uint64_t)(pair->src_st.st_size)); if (io_seek_src(pair, strm.seek_pos)) goto error; // avail_in must be zero so that we will read new // input. strm.avail_in = 0; break; case LZMA_STREAM_END: { lzma_end(&strm); xfi->idx = idx; // Calculate xfi->stream_padding. lzma_index_iter iter; lzma_index_iter_init(&iter, xfi->idx); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) xfi->stream_padding += iter.stream.padding; return false; } default: - message_error("%s: %s", pair->src_name, + message_error(_("%s: %s"), pair->src_name, message_strm(ret)); // If the error was too low memory usage limit, // show also how much memory would have been needed. if (ret == LZMA_MEMLIMIT_ERROR) message_mem_needed(V_ERROR, lzma_memusage(&strm)); goto error; } } error: lzma_end(&strm); return true; } /// \brief Parse the Block Header /// /// The result is stored into *bhi. The caller takes care of initializing it. /// /// \return False on success, true on error. static bool parse_block_header(file_pair *pair, const lzma_index_iter *iter, block_header_info *bhi, xz_file_info *xfi) { #if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX # error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX #endif // Get the whole Block Header with one read, but don't read past // the end of the Block (or even its Check field). const uint32_t size = my_min(iter->block.total_size - lzma_check_size(iter->stream.flags->check), LZMA_BLOCK_HEADER_SIZE_MAX); io_buf buf; if (io_pread(pair, &buf, size, iter->block.compressed_file_offset)) return true; // Zero would mean Index Indicator and thus not a valid Block. if (buf.u8[0] == 0) goto data_error; // Initialize the block structure and decode Block Header Size. lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_block block; block.version = 0; block.check = iter->stream.flags->check; block.filters = filters; block.header_size = lzma_block_header_size_decode(buf.u8[0]); if (block.header_size > size) goto data_error; // Decode the Block Header. switch (lzma_block_header_decode(&block, NULL, buf.u8)) { case LZMA_OK: break; case LZMA_OPTIONS_ERROR: - message_error("%s: %s", pair->src_name, + message_error(_("%s: %s"), pair->src_name, message_strm(LZMA_OPTIONS_ERROR)); return true; case LZMA_DATA_ERROR: goto data_error; default: message_bug(); } // Check the Block Flags. These must be done before calling // lzma_block_compressed_size(), because it overwrites // block.compressed_size. // // NOTE: If you add new characters here, update the minimum number of // columns in headings[HEADING_HEADERFLAGS] to match the number of // characters used here. bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN ? 'c' : '-'; bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN ? 'u' : '-'; bhi->flags[2] = '\0'; // Collect information if all Blocks have both Compressed Size // and Uncompressed Size fields. They can be useful e.g. for // multi-threaded decompression so it can be useful to know it. xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN && block.uncompressed_size != LZMA_VLI_UNKNOWN; // Validate or set block.compressed_size. switch (lzma_block_compressed_size(&block, iter->block.unpadded_size)) { case LZMA_OK: // Validate also block.uncompressed_size if it is present. // If it isn't present, there's no need to set it since // we aren't going to actually decompress the Block; if // we were decompressing, then we should set it so that // the Block decoder could validate the Uncompressed Size // that was stored in the Index. if (block.uncompressed_size == LZMA_VLI_UNKNOWN || block.uncompressed_size == iter->block.uncompressed_size) break; // If the above fails, the file is corrupt so // LZMA_DATA_ERROR is a good error code. // Fall through case LZMA_DATA_ERROR: // Free the memory allocated by lzma_block_header_decode(). lzma_filters_free(filters, NULL); goto data_error; default: message_bug(); } // Copy the known sizes. bhi->header_size = block.header_size; bhi->compressed_size = block.compressed_size; // Calculate the decoder memory usage and update the maximum // memory usage of this Block. bhi->memusage = lzma_raw_decoder_memusage(filters); if (xfi->memusage_max < bhi->memusage) xfi->memusage_max = bhi->memusage; // Determine the minimum XZ Utils version that supports this Block. // // - ARM64 filter needs 5.4.0. // // - 5.0.0 doesn't support empty LZMA2 streams and thus empty // Blocks that use LZMA2. This decoder bug was fixed in 5.0.2. if (xfi->min_version < 50040002U) { for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { if (filters[i].id == LZMA_FILTER_ARM64) { xfi->min_version = 50040002U; break; } } } if (xfi->min_version < 50000022U) { size_t i = 0; while (filters[i + 1].id != LZMA_VLI_UNKNOWN) ++i; if (filters[i].id == LZMA_FILTER_LZMA2 && iter->block.uncompressed_size == 0) xfi->min_version = 50000022U; } // Convert the filter chain to human readable form. const lzma_ret str_ret = lzma_str_from_filters( &bhi->filter_chain, filters, LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG, NULL); // Free the memory allocated by lzma_block_header_decode(). lzma_filters_free(filters, NULL); // Check if the stringification succeeded. if (str_ret != LZMA_OK) { - message_error("%s: %s", pair->src_name, message_strm(str_ret)); + message_error(_("%s: %s"), pair->src_name, + message_strm(str_ret)); return true; } return false; data_error: // Show the error message. - message_error("%s: %s", pair->src_name, + message_error(_("%s: %s"), pair->src_name, message_strm(LZMA_DATA_ERROR)); return true; } /// \brief Parse the Check field and put it into check_value[] /// /// \return False on success, true on error. static bool parse_check_value(file_pair *pair, const lzma_index_iter *iter) { // Don't read anything from the file if there is no integrity Check. if (iter->stream.flags->check == LZMA_CHECK_NONE) { snprintf(check_value, sizeof(check_value), "---"); return false; } // Locate and read the Check field. const uint32_t size = lzma_check_size(iter->stream.flags->check); const uint64_t offset = iter->block.compressed_file_offset + iter->block.total_size - size; io_buf buf; if (io_pread(pair, &buf, size, offset)) return true; // CRC32 and CRC64 are in little endian. Guess that all the future // 32-bit and 64-bit Check values are little endian too. It shouldn't // be a too big problem if this guess is wrong. if (size == 4) snprintf(check_value, sizeof(check_value), "%08" PRIx32, conv32le(buf.u32[0])); else if (size == 8) snprintf(check_value, sizeof(check_value), "%016" PRIx64, conv64le(buf.u64[0])); else for (size_t i = 0; i < size; ++i) snprintf(check_value + i * 2, 3, "%02x", buf.u8[i]); return false; } /// \brief Parse detailed information about a Block /// /// Since this requires seek(s), listing information about all Blocks can /// be slow. /// /// \param pair Input file /// \param iter Location of the Block whose Check value should /// be printed. /// \param bhi Pointer to structure where to store the information /// about the Block Header field. /// /// \return False on success, true on error. If an error occurs, /// the error message is printed too so the caller doesn't /// need to worry about that. static bool parse_details(file_pair *pair, const lzma_index_iter *iter, block_header_info *bhi, xz_file_info *xfi) { if (parse_block_header(pair, iter, bhi, xfi)) return true; if (parse_check_value(pair, iter)) return true; return false; } /// \brief Get the compression ratio /// /// This has slightly different format than that is used in message.c. static const char * get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) { if (uncompressed_size == 0) return "---"; const double ratio = (double)(compressed_size) / (double)(uncompressed_size); if (ratio > 9.999) return "---"; static char buf[16]; snprintf(buf, sizeof(buf), "%.3f", ratio); return buf; } /// \brief Get a comma-separated list of Check names /// /// The check names are translated with gettext except when in robot mode. /// /// \param buf Buffer to hold the resulting string /// \param checks Bit mask of Checks to print /// \param space_after_comma /// It's better to not use spaces in table-like listings, /// but in more verbose formats a space after a comma /// is good for readability. static void get_check_names(char buf[CHECKS_STR_SIZE], uint32_t checks, bool space_after_comma) { // If we get called when there are no Checks to print, set checks // to 1 so that we print "None". This can happen in the robot mode // when printing the totals line if there are no valid input files. if (checks == 0) checks = 1; char *pos = buf; size_t left = CHECKS_STR_SIZE; const char *sep = space_after_comma ? ", " : ","; bool comma = false; for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { if (checks & (UINT32_C(1) << i)) { my_snprintf(&pos, &left, "%s%s", comma ? sep : "", opt_robot ? check_names[i] : _(check_names[i])); comma = true; } } return; } static bool print_info_basic(const xz_file_info *xfi, file_pair *pair) { static bool headings_displayed = false; if (!headings_displayed) { headings_displayed = true; // TRANSLATORS: These are column headings. From Strms (Streams) // to Ratio, the columns are right aligned. Check and Filename // are left aligned. If you need longer words, it's OK to // use two lines here. Test with "xz -l foo.xz". puts(_("Strms Blocks Compressed Uncompressed Ratio " "Check Filename")); } char checks[CHECKS_STR_SIZE]; get_check_names(checks, lzma_index_checks(xfi->idx), false); const char *cols[7] = { uint64_to_str(lzma_index_stream_count(xfi->idx), 0), uint64_to_str(lzma_index_block_count(xfi->idx), 1), uint64_to_nicestr(lzma_index_file_size(xfi->idx), NICESTR_B, NICESTR_TIB, false, 2), uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx), NICESTR_B, NICESTR_TIB, false, 3), get_ratio(lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx)), checks, pair->src_name, }; printf("%*s %*s %*s %*s %*s %-*s %s\n", tuklib_mbstr_fw(cols[0], 5), cols[0], tuklib_mbstr_fw(cols[1], 7), cols[1], tuklib_mbstr_fw(cols[2], 11), cols[2], tuklib_mbstr_fw(cols[3], 11), cols[3], tuklib_mbstr_fw(cols[4], 5), cols[4], tuklib_mbstr_fw(cols[5], 7), cols[5], cols[6]); return false; } static void print_adv_helper(uint64_t stream_count, uint64_t block_count, uint64_t compressed_size, uint64_t uncompressed_size, uint32_t checks, uint64_t stream_padding) { char checks_str[CHECKS_STR_SIZE]; get_check_names(checks_str, checks, true); printf(" %-*s %s\n", COLON_STR(COLON_STR_STREAMS), uint64_to_str(stream_count, 0)); printf(" %-*s %s\n", COLON_STR(COLON_STR_BLOCKS), uint64_to_str(block_count, 0)); printf(" %-*s %s\n", COLON_STR(COLON_STR_COMPRESSED_SIZE), uint64_to_nicestr(compressed_size, NICESTR_B, NICESTR_TIB, true, 0)); printf(" %-*s %s\n", COLON_STR(COLON_STR_UNCOMPRESSED_SIZE), uint64_to_nicestr(uncompressed_size, NICESTR_B, NICESTR_TIB, true, 0)); printf(" %-*s %s\n", COLON_STR(COLON_STR_RATIO), get_ratio(compressed_size, uncompressed_size)); printf(" %-*s %s\n", COLON_STR(COLON_STR_CHECK), checks_str); printf(" %-*s %s\n", COLON_STR(COLON_STR_STREAM_PADDING), uint64_to_nicestr(stream_padding, NICESTR_B, NICESTR_TIB, true, 0)); return; } static bool print_info_adv(xz_file_info *xfi, file_pair *pair) { // Print the overall information. print_adv_helper(lzma_index_stream_count(xfi->idx), lzma_index_block_count(xfi->idx), lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx), lzma_index_checks(xfi->idx), xfi->stream_padding); // Size of the biggest Check. This is used to calculate the width // of the CheckVal field. The table would get insanely wide if // we always reserved space for 64-byte Check (128 chars as hex). uint32_t check_max = 0; // Print information about the Streams. // // All except Check are right aligned; Check is left aligned. // Test with "xz -lv foo.xz". printf(" %s\n %*s %*s %*s %*s %*s %*s %*s %-*s %*s\n", _(colon_strs[COLON_STR_STREAMS]), HEADING_STR(HEADING_STREAM), HEADING_STR(HEADING_BLOCKS), HEADING_STR(HEADING_COMPOFFSET), HEADING_STR(HEADING_UNCOMPOFFSET), HEADING_STR(HEADING_COMPSIZE), HEADING_STR(HEADING_UNCOMPSIZE), HEADING_STR(HEADING_RATIO), HEADING_STR(HEADING_CHECK), HEADING_STR(HEADING_PADDING)); lzma_index_iter iter; lzma_index_iter_init(&iter, xfi->idx); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { const char *cols1[4] = { uint64_to_str(iter.stream.number, 0), uint64_to_str(iter.stream.block_count, 1), uint64_to_str(iter.stream.compressed_offset, 2), uint64_to_str(iter.stream.uncompressed_offset, 3), }; printf(" %*s %*s %*s %*s ", tuklib_mbstr_fw(cols1[0], headings[HEADING_STREAM].columns), cols1[0], tuklib_mbstr_fw(cols1[1], headings[HEADING_BLOCKS].columns), cols1[1], tuklib_mbstr_fw(cols1[2], headings[HEADING_COMPOFFSET].columns), cols1[2], tuklib_mbstr_fw(cols1[3], headings[HEADING_UNCOMPOFFSET].columns), cols1[3]); const char *cols2[5] = { uint64_to_str(iter.stream.compressed_size, 0), uint64_to_str(iter.stream.uncompressed_size, 1), get_ratio(iter.stream.compressed_size, iter.stream.uncompressed_size), _(check_names[iter.stream.flags->check]), uint64_to_str(iter.stream.padding, 2), }; printf("%*s %*s %*s %-*s %*s\n", tuklib_mbstr_fw(cols2[0], headings[HEADING_COMPSIZE].columns), cols2[0], tuklib_mbstr_fw(cols2[1], headings[HEADING_UNCOMPSIZE].columns), cols2[1], tuklib_mbstr_fw(cols2[2], headings[HEADING_RATIO].columns), cols2[2], tuklib_mbstr_fw(cols2[3], headings[HEADING_CHECK].columns), cols2[3], tuklib_mbstr_fw(cols2[4], headings[HEADING_PADDING].columns), cols2[4]); // Update the maximum Check size. if (lzma_check_size(iter.stream.flags->check) > check_max) check_max = lzma_check_size(iter.stream.flags->check); } // Cache the verbosity level to a local variable. const bool detailed = message_verbosity_get() >= V_DEBUG; // Print information about the Blocks but only if there is // at least one Block. if (lzma_index_block_count(xfi->idx) > 0) { // Calculate the width of the CheckVal column. This can be // used as is as the field width for printf() when printing // the actual check value as it is hexadecimal. However, to // print the column heading, further calculation is needed // to handle a translated string (it's done a few lines later). assert(check_max <= LZMA_CHECK_SIZE_MAX); const int checkval_width = my_max( headings[HEADING_CHECKVAL].columns, (int)(2 * check_max)); // All except Check are right aligned; Check is left aligned. printf(" %s\n %*s %*s %*s %*s %*s %*s %*s %-*s", _(colon_strs[COLON_STR_BLOCKS]), HEADING_STR(HEADING_STREAM), HEADING_STR(HEADING_BLOCK), HEADING_STR(HEADING_COMPOFFSET), HEADING_STR(HEADING_UNCOMPOFFSET), HEADING_STR(HEADING_TOTALSIZE), HEADING_STR(HEADING_UNCOMPSIZE), HEADING_STR(HEADING_RATIO), detailed ? headings[HEADING_CHECK].fw : 1, _(headings[HEADING_CHECK].str)); if (detailed) { // CheckVal (Check value), Flags, and Filters are // left aligned. Block Header Size, CompSize, and // MemUsage are right aligned. Test with // "xz -lvv foo.xz". printf(" %-*s %*s %-*s %*s %*s %s", headings[HEADING_CHECKVAL].fw + checkval_width - headings[HEADING_CHECKVAL].columns, _(headings[HEADING_CHECKVAL].str), HEADING_STR(HEADING_HEADERSIZE), HEADING_STR(HEADING_HEADERFLAGS), HEADING_STR(HEADING_COMPSIZE), HEADING_STR(HEADING_MEMUSAGE), _(headings[HEADING_FILTERS].str)); } putchar('\n'); lzma_index_iter_init(&iter, xfi->idx); // Iterate over the Blocks. while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { // If in detailed mode, collect the information from // Block Header before starting to print the next line. block_header_info bhi = BLOCK_HEADER_INFO_INIT; if (detailed && parse_details(pair, &iter, &bhi, xfi)) return true; const char *cols1[4] = { uint64_to_str(iter.stream.number, 0), uint64_to_str( iter.block.number_in_stream, 1), uint64_to_str( iter.block.compressed_file_offset, 2), uint64_to_str( iter.block.uncompressed_file_offset, 3) }; printf(" %*s %*s %*s %*s ", tuklib_mbstr_fw(cols1[0], headings[HEADING_STREAM].columns), cols1[0], tuklib_mbstr_fw(cols1[1], headings[HEADING_BLOCK].columns), cols1[1], tuklib_mbstr_fw(cols1[2], headings[HEADING_COMPOFFSET].columns), cols1[2], tuklib_mbstr_fw(cols1[3], headings[ HEADING_UNCOMPOFFSET].columns), cols1[3]); const char *cols2[4] = { uint64_to_str(iter.block.total_size, 0), uint64_to_str(iter.block.uncompressed_size, 1), get_ratio(iter.block.total_size, iter.block.uncompressed_size), _(check_names[iter.stream.flags->check]) }; printf("%*s %*s %*s %-*s", tuklib_mbstr_fw(cols2[0], headings[HEADING_TOTALSIZE].columns), cols2[0], tuklib_mbstr_fw(cols2[1], headings[HEADING_UNCOMPSIZE].columns), cols2[1], tuklib_mbstr_fw(cols2[2], headings[HEADING_RATIO].columns), cols2[2], tuklib_mbstr_fw(cols2[3], detailed ? headings[HEADING_CHECK].columns : 1), cols2[3]); if (detailed) { const lzma_vli compressed_size = iter.block.unpadded_size - bhi.header_size - lzma_check_size( iter.stream.flags->check); const char *cols3[6] = { check_value, uint64_to_str(bhi.header_size, 0), bhi.flags, uint64_to_str(compressed_size, 1), uint64_to_str( round_up_to_mib(bhi.memusage), 2), bhi.filter_chain }; // Show MiB for memory usage, because it // is the only size which is not in bytes. printf(" %-*s %*s %-*s %*s %*s MiB %s", checkval_width, cols3[0], tuklib_mbstr_fw(cols3[1], headings[ HEADING_HEADERSIZE].columns), cols3[1], tuklib_mbstr_fw(cols3[2], headings[ HEADING_HEADERFLAGS].columns), cols3[2], tuklib_mbstr_fw(cols3[3], headings[ HEADING_COMPSIZE].columns), cols3[3], tuklib_mbstr_fw(cols3[4], headings[ HEADING_MEMUSAGE].columns - 4), cols3[4], cols3[5]); } putchar('\n'); block_header_info_end(&bhi); } } if (detailed) { printf(" %-*s %s MiB\n", COLON_STR(COLON_STR_MEMORY_NEEDED), uint64_to_str( round_up_to_mib(xfi->memusage_max), 0)); printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS), xfi->all_have_sizes ? _("Yes") : _("No")); //printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION), printf(_(" Minimum XZ Utils version: %s\n"), xz_ver_to_str(xfi->min_version)); } return false; } static bool print_info_robot(xz_file_info *xfi, file_pair *pair) { char checks[CHECKS_STR_SIZE]; get_check_names(checks, lzma_index_checks(xfi->idx), false); printf("name\t%s\n", pair->src_name); printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\n", lzma_index_stream_count(xfi->idx), lzma_index_block_count(xfi->idx), lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx), get_ratio(lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx)), checks, xfi->stream_padding); if (message_verbosity_get() >= V_VERBOSE) { lzma_index_iter iter; lzma_index_iter_init(&iter, xfi->idx); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\n", iter.stream.number, iter.stream.block_count, iter.stream.compressed_offset, iter.stream.uncompressed_offset, iter.stream.compressed_size, iter.stream.uncompressed_size, get_ratio(iter.stream.compressed_size, iter.stream.uncompressed_size), check_names[iter.stream.flags->check], iter.stream.padding); lzma_index_iter_rewind(&iter); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { block_header_info bhi = BLOCK_HEADER_INFO_INIT; if (message_verbosity_get() >= V_DEBUG && parse_details( pair, &iter, &bhi, xfi)) return true; printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", iter.stream.number, iter.block.number_in_stream, iter.block.number_in_file, iter.block.compressed_file_offset, iter.block.uncompressed_file_offset, iter.block.total_size, iter.block.uncompressed_size, get_ratio(iter.block.total_size, iter.block.uncompressed_size), check_names[iter.stream.flags->check]); if (message_verbosity_get() >= V_DEBUG) printf("\t%s\t%" PRIu32 "\t%s\t%" PRIu64 "\t%" PRIu64 "\t%s", check_value, bhi.header_size, bhi.flags, bhi.compressed_size, bhi.memusage, bhi.filter_chain); putchar('\n'); block_header_info_end(&bhi); } } if (message_verbosity_get() >= V_DEBUG) printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n", xfi->memusage_max, xfi->all_have_sizes ? "yes" : "no", xfi->min_version); return false; } static void update_totals(const xz_file_info *xfi) { // TODO: Integer overflow checks ++totals.files; totals.streams += lzma_index_stream_count(xfi->idx); totals.blocks += lzma_index_block_count(xfi->idx); totals.compressed_size += lzma_index_file_size(xfi->idx); totals.uncompressed_size += lzma_index_uncompressed_size(xfi->idx); totals.stream_padding += xfi->stream_padding; totals.checks |= lzma_index_checks(xfi->idx); if (totals.memusage_max < xfi->memusage_max) totals.memusage_max = xfi->memusage_max; if (totals.min_version < xfi->min_version) totals.min_version = xfi->min_version; totals.all_have_sizes &= xfi->all_have_sizes; return; } static void print_totals_basic(void) { // Print a separator line. char line[80]; memset(line, '-', sizeof(line)); line[sizeof(line) - 1] = '\0'; puts(line); // Get the check names. char checks[CHECKS_STR_SIZE]; get_check_names(checks, totals.checks, false); // Print the totals except the file count, which needs // special handling. printf("%5s %7s %11s %11s %5s %-7s ", uint64_to_str(totals.streams, 0), uint64_to_str(totals.blocks, 1), uint64_to_nicestr(totals.compressed_size, NICESTR_B, NICESTR_TIB, false, 2), uint64_to_nicestr(totals.uncompressed_size, NICESTR_B, NICESTR_TIB, false, 3), get_ratio(totals.compressed_size, totals.uncompressed_size), checks); // Since we print totals only when there are at least two files, // the English message will always use "%s files". But some other // languages need different forms for different plurals so we // have to translate this with ngettext(). // // TRANSLATORS: %s is an integer. Only the plural form of this // message is used (e.g. "2 files"). Test with "xz -l foo.xz bar.xz". printf(ngettext("%s file\n", "%s files\n", totals.files <= ULONG_MAX ? totals.files : (totals.files % 1000000) + 1000000), uint64_to_str(totals.files, 0)); return; } static void print_totals_adv(void) { putchar('\n'); puts(_("Totals:")); printf(" %-*s %s\n", COLON_STR(COLON_STR_NUMBER_OF_FILES), uint64_to_str(totals.files, 0)); print_adv_helper(totals.streams, totals.blocks, totals.compressed_size, totals.uncompressed_size, totals.checks, totals.stream_padding); if (message_verbosity_get() >= V_DEBUG) { printf(" %-*s %s MiB\n", COLON_STR(COLON_STR_MEMORY_NEEDED), uint64_to_str( round_up_to_mib(totals.memusage_max), 0)); printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS), totals.all_have_sizes ? _("Yes") : _("No")); //printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION), printf(_(" Minimum XZ Utils version: %s\n"), xz_ver_to_str(totals.min_version)); } return; } static void print_totals_robot(void) { char checks[CHECKS_STR_SIZE]; get_check_names(checks, totals.checks, false); printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\t%" PRIu64, totals.streams, totals.blocks, totals.compressed_size, totals.uncompressed_size, get_ratio(totals.compressed_size, totals.uncompressed_size), checks, totals.stream_padding, totals.files); if (message_verbosity_get() >= V_DEBUG) printf("\t%" PRIu64 "\t%s\t%" PRIu32, totals.memusage_max, totals.all_have_sizes ? "yes" : "no", totals.min_version); putchar('\n'); return; } extern void list_totals(void) { if (opt_robot) { // Always print totals in --robot mode. It can be convenient // in some cases and doesn't complicate usage of the // single-file case much. print_totals_robot(); } else if (totals.files > 1) { // For non-robot mode, totals are printed only if there // is more than one file. if (message_verbosity_get() <= V_WARNING) print_totals_basic(); else print_totals_adv(); } return; } extern void list_file(const char *filename) { if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) message_fatal(_("--list works only on .xz files " "(--format=xz or --format=auto)")); message_filename(filename); if (filename == stdin_filename) { message_error(_("--list does not support reading from " "standard input")); return; } init_field_widths(); // Unset opt_stdout so that io_open_src() won't accept special files. // Set opt_force so that io_open_src() will follow symlinks. opt_stdout = false; opt_force = true; file_pair *pair = io_open_src(filename); if (pair == NULL) return; xz_file_info xfi = XZ_FILE_INFO_INIT; if (!parse_indexes(&xfi, pair)) { bool fail; // We have three main modes: // - --robot, which has submodes if --verbose is specified // once or twice // - Normal --list without --verbose // - --list with one or two --verbose if (opt_robot) fail = print_info_robot(&xfi, pair); else if (message_verbosity_get() <= V_WARNING) fail = print_info_basic(&xfi, pair); else fail = print_info_adv(&xfi, pair); // Update the totals that are displayed after all // the individual files have been listed. Don't count // broken files. if (!fail) update_totals(&xfi); lzma_index_end(xfi.idx, NULL); } io_close(pair, false); return; } diff --git a/contrib/xz/src/xz/message.c b/contrib/xz/src/xz/message.c index c54ebc5b5800..abf30adcc26c 100644 --- a/contrib/xz/src/xz/message.c +++ b/contrib/xz/src/xz/message.c @@ -1,1146 +1,1146 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file message.c /// \brief Printing messages // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include /// Number of the current file static unsigned int files_pos = 0; /// Total number of input files; zero if unknown. static unsigned int files_total; /// Verbosity level static enum message_verbosity verbosity = V_WARNING; /// Filename which we will print with the verbose messages static const char *filename; /// True once the a filename has been printed to stderr as part of progress /// message. If automatic progress updating isn't enabled, this becomes true /// after the first progress message has been printed due to user sending /// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print /// an empty line before the next filename to make the output more readable. static bool first_filename_printed = false; /// This is set to true when we have printed the current filename to stderr /// as part of a progress message. This variable is useful only if not /// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or /// SIGALRM signals, we won't print the name of the same file multiple times. static bool current_filename_printed = false; /// True if we should print progress indicator and update it automatically /// if also verbose >= V_VERBOSE. static bool progress_automatic; /// True if message_progress_start() has been called but /// message_progress_end() hasn't been called yet. static bool progress_started = false; /// This is true when a progress message was printed and the cursor is still /// on the same line with the progress message. In that case, a newline has /// to be printed before any error messages. static bool progress_active = false; /// Pointer to lzma_stream used to do the encoding or decoding. static lzma_stream *progress_strm; /// This is true if we are in passthru mode (not actually compressing or /// decompressing) and thus cannot use lzma_get_progress(progress_strm, ...). /// That is, we are using coder_passthru() in coder.c. static bool progress_is_from_passthru; /// Expected size of the input stream is needed to show completion percentage /// and estimate remaining time. static uint64_t expected_in_size; // Use alarm() and SIGALRM when they are supported. This has two minor // advantages over the alternative of polling gettimeofday(): // - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to // get intermediate progress information even when --verbose wasn't used // or stderr is not a terminal. // - alarm() + SIGALRM seems to have slightly less overhead than polling // gettimeofday(). #ifdef SIGALRM const int message_progress_sigs[] = { SIGALRM, #ifdef SIGINFO SIGINFO, #endif #ifdef SIGUSR1 SIGUSR1, #endif 0 }; /// The signal handler for SIGALRM sets this to true. It is set back to false /// once the progress message has been updated. static volatile sig_atomic_t progress_needs_updating = false; /// Signal handler for SIGALRM static void progress_signal_handler(int sig lzma_attribute((__unused__))) { progress_needs_updating = true; return; } #else /// This is true when progress message printing is wanted. Using the same /// variable name as above to avoid some ifdefs. static bool progress_needs_updating = false; /// Elapsed time when the next progress message update should be done. static uint64_t progress_next_update; #endif extern void message_init(void) { // If --verbose is used, we use a progress indicator if and only // if stderr is a terminal. If stderr is not a terminal, we print // verbose information only after finishing the file. As a special // exception, even if --verbose was not used, user can send SIGALRM // to make us print progress information once without automatic // updating. progress_automatic = isatty(STDERR_FILENO); // Commented out because COLUMNS is rarely exported to environment. // Most users have at least 80 columns anyway, let's think something // fancy here if enough people complain. /* if (progress_automatic) { // stderr is a terminal. Check the COLUMNS environment // variable to see if the terminal is wide enough. If COLUMNS // doesn't exist or it has some unparsable value, we assume // that the terminal is wide enough. const char *columns_str = getenv("COLUMNS"); if (columns_str != NULL) { char *endptr; const long columns = strtol(columns_str, &endptr, 10); if (*endptr != '\0' || columns < 80) progress_automatic = false; } } */ #ifdef SIGALRM // Establish the signal handlers which set a flag to tell us that // progress info should be updated. struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sa.sa_handler = &progress_signal_handler; for (size_t i = 0; message_progress_sigs[i] != 0; ++i) if (sigaction(message_progress_sigs[i], &sa, NULL)) message_signal_handler(); #endif return; } extern void message_verbosity_increase(void) { if (verbosity < V_DEBUG) ++verbosity; return; } extern void message_verbosity_decrease(void) { if (verbosity > V_SILENT) --verbosity; return; } extern enum message_verbosity message_verbosity_get(void) { return verbosity; } extern void message_set_files(unsigned int files) { files_total = files; return; } /// Prints the name of the current file if it hasn't been printed already, /// except if we are processing exactly one stream from stdin to stdout. /// I think it looks nicer to not print "(stdin)" when --verbose is used /// in a pipe and no other files are processed. static void print_filename(void) { if (!opt_robot && (files_total != 1 || filename != stdin_filename)) { signals_block(); FILE *file = opt_mode == MODE_LIST ? stdout : stderr; // If a file was already processed, put an empty line // before the next filename to improve readability. if (first_filename_printed) fputc('\n', file); first_filename_printed = true; current_filename_printed = true; // If we don't know how many files there will be due // to usage of --files or --files0. if (files_total == 0) fprintf(file, "%s (%u)\n", filename, files_pos); else fprintf(file, "%s (%u/%u)\n", filename, files_pos, files_total); signals_unblock(); } return; } extern void message_filename(const char *src_name) { // Start numbering the files starting from one. ++files_pos; filename = src_name; if (verbosity >= V_VERBOSE && (progress_automatic || opt_mode == MODE_LIST)) print_filename(); else current_filename_printed = false; return; } extern void message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size) { // Store the pointer to the lzma_stream used to do the coding. // It is needed to find out the position in the stream. progress_strm = strm; progress_is_from_passthru = is_passthru; // Store the expected size of the file. If we aren't printing any // statistics, then is will be unused. But since it is possible // that the user sends us a signal to show statistics, we need // to have it available anyway. expected_in_size = in_size; // Indicate that progress info may need to be printed before // printing error messages. progress_started = true; // If progress indicator is wanted, print the filename and possibly // the file count now. if (verbosity >= V_VERBOSE && progress_automatic) { // Start the timer to display the first progress message // after one second. An alternative would be to show the // first message almost immediately, but delaying by one // second looks better to me, since extremely early // progress info is pretty much useless. #ifdef SIGALRM // First disable a possibly existing alarm. alarm(0); progress_needs_updating = false; alarm(1); #else progress_needs_updating = true; progress_next_update = 1000; #endif } return; } /// Make the string indicating completion percentage. static const char * progress_percentage(uint64_t in_pos) { // If the size of the input file is unknown or the size told us is // clearly wrong since we have processed more data than the alleged // size of the file, show a static string indicating that we have // no idea of the completion percentage. if (expected_in_size == 0 || in_pos > expected_in_size) return "--- %"; // Never show 100.0 % before we actually are finished. double percentage = (double)(in_pos) / (double)(expected_in_size) * 99.9; // Use big enough buffer to hold e.g. a multibyte decimal point. static char buf[16]; snprintf(buf, sizeof(buf), "%.1f %%", percentage); return buf; } /// Make the string containing the amount of input processed, amount of /// output produced, and the compression ratio. static const char * progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) { // Use big enough buffer to hold e.g. a multibyte thousand separators. static char buf[128]; char *pos = buf; size_t left = sizeof(buf); // Print the sizes. If this the final message, use more reasonable // units than MiB if the file was small. const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; my_snprintf(&pos, &left, "%s / %s", uint64_to_nicestr(compressed_pos, unit_min, NICESTR_TIB, false, 0), uint64_to_nicestr(uncompressed_pos, unit_min, NICESTR_TIB, false, 1)); // Avoid division by zero. If we cannot calculate the ratio, set // it to some nice number greater than 10.0 so that it gets caught // in the next if-clause. const double ratio = uncompressed_pos > 0 ? (double)(compressed_pos) / (double)(uncompressed_pos) : 16.0; // If the ratio is very bad, just indicate that it is greater than // 9.999. This way the length of the ratio field stays fixed. if (ratio > 9.999) snprintf(pos, left, " > %.3f", 9.999); else snprintf(pos, left, " = %.3f", ratio); return buf; } /// Make the string containing the processing speed of uncompressed data. static const char * progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) { // Don't print the speed immediately, since the early values look // somewhat random. if (elapsed < 3000) return ""; // The first character of KiB/s, MiB/s, or GiB/s: static const char unit[] = { 'K', 'M', 'G' }; size_t unit_index = 0; // Calculate the speed as KiB/s. double speed = (double)(uncompressed_pos) / ((double)(elapsed) * (1024.0 / 1000.0)); // Adjust the unit of the speed if needed. while (speed > 999.0) { speed /= 1024.0; if (++unit_index == ARRAY_SIZE(unit)) return ""; // Way too fast ;-) } // Use decimal point only if the number is small. Examples: // - 0.1 KiB/s // - 9.9 KiB/s // - 99 KiB/s // - 999 KiB/s // Use big enough buffer to hold e.g. a multibyte decimal point. static char buf[16]; snprintf(buf, sizeof(buf), "%.*f %ciB/s", speed > 9.9 ? 0 : 1, speed, unit[unit_index]); return buf; } /// Make a string indicating elapsed time. The format is either /// M:SS or H:MM:SS depending on if the time is an hour or more. static const char * progress_time(uint64_t mseconds) { // 9999 hours = 416 days static char buf[sizeof("9999:59:59")]; // 32-bit variable is enough for elapsed time (136 years). uint32_t seconds = (uint32_t)(mseconds / 1000); // Don't show anything if the time is zero or ridiculously big. if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) return ""; uint32_t minutes = seconds / 60; seconds %= 60; if (minutes >= 60) { const uint32_t hours = minutes / 60; minutes %= 60; snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, hours, minutes, seconds); } else { snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, minutes, seconds); } return buf; } /// Return a string containing estimated remaining time when /// reasonably possible. static const char * progress_remaining(uint64_t in_pos, uint64_t elapsed) { // Don't show the estimated remaining time when it wouldn't // make sense: // - Input size is unknown. // - Input has grown bigger since we started (de)compressing. // - We haven't processed much data yet, so estimate would be // too inaccurate. // - Only a few seconds has passed since we started (de)compressing, // so estimate would be too inaccurate. if (expected_in_size == 0 || in_pos > expected_in_size || in_pos < (UINT64_C(1) << 19) || elapsed < 8000) return ""; // Calculate the estimate. Don't give an estimate of zero seconds, // since it is possible that all the input has been already passed // to the library, but there is still quite a bit of output pending. uint32_t remaining = (uint32_t)((double)(expected_in_size - in_pos) * ((double)(elapsed) / 1000.0) / (double)(in_pos)); if (remaining < 1) remaining = 1; static char buf[sizeof("9 h 55 min")]; // Select appropriate precision for the estimated remaining time. if (remaining <= 10) { // A maximum of 10 seconds remaining. // Show the number of seconds as is. snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); } else if (remaining <= 50) { // A maximum of 50 seconds remaining. // Round up to the next multiple of five seconds. remaining = (remaining + 4) / 5 * 5; snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); } else if (remaining <= 590) { // A maximum of 9 minutes and 50 seconds remaining. // Round up to the next multiple of ten seconds. remaining = (remaining + 9) / 10 * 10; snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", remaining / 60, remaining % 60); } else if (remaining <= 59 * 60) { // A maximum of 59 minutes remaining. // Round up to the next multiple of a minute. remaining = (remaining + 59) / 60; snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); } else if (remaining <= 9 * 3600 + 50 * 60) { // A maximum of 9 hours and 50 minutes left. // Round up to the next multiple of ten minutes. remaining = (remaining + 599) / 600 * 10; snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", remaining / 60, remaining % 60); } else if (remaining <= 23 * 3600) { // A maximum of 23 hours remaining. // Round up to the next multiple of an hour. remaining = (remaining + 3599) / 3600; snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { // A maximum of 9 days and 23 hours remaining. // Round up to the next multiple of an hour. remaining = (remaining + 3599) / 3600; snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", remaining / 24, remaining % 24); } else if (remaining <= 999 * 24 * 3600) { // A maximum of 999 days remaining. ;-) // Round up to the next multiple of a day. remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); } else { // The estimated remaining time is too big. Don't show it. return ""; } return buf; } /// Get how much uncompressed and compressed data has been processed. static void progress_pos(uint64_t *in_pos, uint64_t *compressed_pos, uint64_t *uncompressed_pos) { uint64_t out_pos; if (progress_is_from_passthru) { // In passthru mode the progress info is in total_in/out but // the *progress_strm itself isn't initialized and thus we // cannot use lzma_get_progress(). *in_pos = progress_strm->total_in; out_pos = progress_strm->total_out; } else { lzma_get_progress(progress_strm, in_pos, &out_pos); } // It cannot have processed more input than it has been given. assert(*in_pos <= progress_strm->total_in); // It cannot have produced more output than it claims to have ready. assert(out_pos >= progress_strm->total_out); if (opt_mode == MODE_COMPRESS) { *compressed_pos = out_pos; *uncompressed_pos = *in_pos; } else { *compressed_pos = *in_pos; *uncompressed_pos = out_pos; } return; } extern void message_progress_update(void) { if (!progress_needs_updating) return; // Calculate how long we have been processing this file. const uint64_t elapsed = mytime_get_elapsed(); #ifndef SIGALRM if (progress_next_update > elapsed) return; progress_next_update = elapsed + 1000; #endif // Get our current position in the stream. uint64_t in_pos; uint64_t compressed_pos; uint64_t uncompressed_pos; progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); // Block signals so that fprintf() doesn't get interrupted. signals_block(); // Print the filename if it hasn't been printed yet. if (!current_filename_printed) print_filename(); // Print the actual progress message. The idea is that there is at // least three spaces between the fields in typical situations, but // even in rare situations there is at least one space. const char *cols[5] = { progress_percentage(in_pos), progress_sizes(compressed_pos, uncompressed_pos, false), progress_speed(uncompressed_pos, elapsed), progress_time(elapsed), progress_remaining(in_pos, elapsed), }; fprintf(stderr, "\r %*s %*s %*s %10s %10s\r", tuklib_mbstr_fw(cols[0], 6), cols[0], tuklib_mbstr_fw(cols[1], 35), cols[1], tuklib_mbstr_fw(cols[2], 9), cols[2], cols[3], cols[4]); #ifdef SIGALRM // Updating the progress info was finished. Reset // progress_needs_updating to wait for the next SIGALRM. // // NOTE: This has to be done before alarm(1) or with (very) bad // luck we could be setting this to false after the alarm has already // been triggered. progress_needs_updating = false; if (verbosity >= V_VERBOSE && progress_automatic) { // Mark that the progress indicator is active, so if an error // occurs, the error message gets printed cleanly. progress_active = true; // Restart the timer so that progress_needs_updating gets // set to true after about one second. alarm(1); } else { // The progress message was printed because user had sent us // SIGALRM. In this case, each progress message is printed // on its own line. fputc('\n', stderr); } #else // When SIGALRM isn't supported and we get here, it's always due to // automatic progress update. We set progress_active here too like // described above. assert(verbosity >= V_VERBOSE); assert(progress_automatic); progress_active = true; #endif signals_unblock(); return; } static void progress_flush(bool finished) { if (!progress_started || verbosity < V_VERBOSE) return; uint64_t in_pos; uint64_t compressed_pos; uint64_t uncompressed_pos; progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); // Avoid printing intermediate progress info if some error occurs // in the beginning of the stream. (If something goes wrong later in // the stream, it is sometimes useful to tell the user where the // error approximately occurred, especially if the error occurs // after a time-consuming operation.) if (!finished && !progress_active && (compressed_pos == 0 || uncompressed_pos == 0)) return; progress_active = false; const uint64_t elapsed = mytime_get_elapsed(); signals_block(); // When using the auto-updating progress indicator, the final // statistics are printed in the same format as the progress // indicator itself. if (progress_automatic) { const char *cols[5] = { finished ? "100 %" : progress_percentage(in_pos), progress_sizes(compressed_pos, uncompressed_pos, true), progress_speed(uncompressed_pos, elapsed), progress_time(elapsed), finished ? "" : progress_remaining(in_pos, elapsed), }; fprintf(stderr, "\r %*s %*s %*s %10s %10s\n", tuklib_mbstr_fw(cols[0], 6), cols[0], tuklib_mbstr_fw(cols[1], 35), cols[1], tuklib_mbstr_fw(cols[2], 9), cols[2], cols[3], cols[4]); } else { // The filename is always printed. - fprintf(stderr, "%s: ", filename); + fprintf(stderr, _("%s: "), filename); // Percentage is printed only if we didn't finish yet. if (!finished) { // Don't print the percentage when it isn't known // (starts with a dash). const char *percentage = progress_percentage(in_pos); if (percentage[0] != '-') fprintf(stderr, "%s, ", percentage); } // Size information is always printed. fprintf(stderr, "%s", progress_sizes( compressed_pos, uncompressed_pos, true)); // The speed and elapsed time aren't always shown. const char *speed = progress_speed(uncompressed_pos, elapsed); if (speed[0] != '\0') fprintf(stderr, ", %s", speed); const char *elapsed_str = progress_time(elapsed); if (elapsed_str[0] != '\0') fprintf(stderr, ", %s", elapsed_str); fputc('\n', stderr); } signals_unblock(); return; } extern void message_progress_end(bool success) { assert(progress_started); progress_flush(success); progress_started = false; return; } static void vmessage(enum message_verbosity v, const char *fmt, va_list ap) { if (v <= verbosity) { signals_block(); progress_flush(false); // TRANSLATORS: This is the program name in the beginning // of the line in messages. Usually it becomes "xz: ". // This is a translatable string because French needs // a space before a colon. fprintf(stderr, _("%s: "), progname); #ifdef __clang__ # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wformat-nonliteral" #endif vfprintf(stderr, fmt, ap); #ifdef __clang__ # pragma GCC diagnostic pop #endif fputc('\n', stderr); signals_unblock(); } return; } extern void message(enum message_verbosity v, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(v, fmt, ap); va_end(ap); return; } extern void message_warning(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_WARNING, fmt, ap); va_end(ap); set_exit_status(E_WARNING); return; } extern void message_error(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_ERROR, fmt, ap); va_end(ap); set_exit_status(E_ERROR); return; } extern void message_fatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_ERROR, fmt, ap); va_end(ap); tuklib_exit(E_ERROR, E_ERROR, false); } extern void message_bug(void) { message_fatal(_("Internal error (bug)")); } extern void message_signal_handler(void) { message_fatal(_("Cannot establish signal handlers")); } extern const char * message_strm(lzma_ret code) { switch (code) { case LZMA_NO_CHECK: return _("No integrity check; not verifying file integrity"); case LZMA_UNSUPPORTED_CHECK: return _("Unsupported type of integrity check; " "not verifying file integrity"); case LZMA_MEM_ERROR: return strerror(ENOMEM); case LZMA_MEMLIMIT_ERROR: return _("Memory usage limit reached"); case LZMA_FORMAT_ERROR: return _("File format not recognized"); case LZMA_OPTIONS_ERROR: return _("Unsupported options"); case LZMA_DATA_ERROR: return _("Compressed data is corrupt"); case LZMA_BUF_ERROR: return _("Unexpected end of input"); case LZMA_OK: case LZMA_STREAM_END: case LZMA_GET_CHECK: case LZMA_PROG_ERROR: case LZMA_SEEK_NEEDED: case LZMA_RET_INTERNAL1: case LZMA_RET_INTERNAL2: case LZMA_RET_INTERNAL3: case LZMA_RET_INTERNAL4: case LZMA_RET_INTERNAL5: case LZMA_RET_INTERNAL6: case LZMA_RET_INTERNAL7: case LZMA_RET_INTERNAL8: // Without "default", compiler will warn if new constants // are added to lzma_ret, it is not too easy to forget to // add the new constants to this function. break; } return _("Internal error (bug)"); } extern void message_mem_needed(enum message_verbosity v, uint64_t memusage) { if (v > verbosity) return; // Convert memusage to MiB, rounding up to the next full MiB. // This way the user can always use the displayed usage as // the new memory usage limit. (If we rounded to the nearest, // the user might need to +1 MiB to get high enough limit.) memusage = round_up_to_mib(memusage); uint64_t memlimit = hardware_memlimit_get(opt_mode); // Handle the case when there is no memory usage limit. // This way we don't print a weird message with a huge number. if (memlimit == UINT64_MAX) { message(v, _("%s MiB of memory is required. " "The limiter is disabled."), uint64_to_str(memusage, 0)); return; } // With US-ASCII: // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 // But there may be multibyte chars so reserve enough space. char memlimitstr[128]; // Show the memory usage limit as MiB unless it is less than 1 MiB. // This way it's easy to notice errors where one has typed // --memory=123 instead of --memory=123MiB. if (memlimit < (UINT32_C(1) << 20)) { snprintf(memlimitstr, sizeof(memlimitstr), "%s B", uint64_to_str(memlimit, 1)); } else { // Round up just like with memusage. If this function is // called for informational purposes (to just show the // current usage and limit), we should never show that // the usage is higher than the limit, which would give // a false impression that the memory usage limit isn't // properly enforced. snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", uint64_to_str(round_up_to_mib(memlimit), 1)); } message(v, _("%s MiB of memory is required. The limit is %s."), uint64_to_str(memusage, 0), memlimitstr); return; } extern void message_filters_show(enum message_verbosity v, const lzma_filter *filters) { if (v > verbosity) return; char *buf; const lzma_ret ret = lzma_str_from_filters(&buf, filters, LZMA_STR_ENCODER | LZMA_STR_GETOPT_LONG, NULL); if (ret != LZMA_OK) message_fatal("%s", message_strm(ret)); fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf); free(buf); return; } extern void message_try_help(void) { // Print this with V_WARNING instead of V_ERROR to prevent it from // showing up when --quiet has been specified. message(V_WARNING, _("Try `%s --help' for more information."), progname); return; } extern void message_version(void) { // It is possible that liblzma version is different than the command // line tool version, so print both. if (opt_robot) { printf("XZ_VERSION=%" PRIu32 "\nLIBLZMA_VERSION=%" PRIu32 "\n", LZMA_VERSION, lzma_version_number()); } else { printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); printf("liblzma %s\n", lzma_version_string()); } tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } extern void message_help(bool long_help) { printf(_("Usage: %s [OPTION]... [FILE]...\n" "Compress or decompress FILEs in the .xz format.\n\n"), progname); // NOTE: The short help doesn't currently have options that // take arguments. if (long_help) puts(_("Mandatory arguments to long options are mandatory " "for short options too.\n")); if (long_help) puts(_(" Operation mode:\n")); puts(_( " -z, --compress force compression\n" " -d, --decompress force decompression\n" " -t, --test test compressed file integrity\n" " -l, --list list information about .xz files")); if (long_help) puts(_("\n Operation modifiers:\n")); puts(_( " -k, --keep keep (don't delete) input files\n" " -f, --force force overwrite of output file and (de)compress links\n" " -c, --stdout write to standard output and don't delete input files")); // NOTE: --to-stdout isn't included above because it's not // the recommended spelling. It was copied from gzip but other // compressors with gzip-like syntax don't support it. if (long_help) { puts(_( " --single-stream decompress only the first stream, and silently\n" " ignore possible remaining input data")); puts(_( " --no-sparse do not create sparse files when decompressing\n" " -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" " --files[=FILE] read filenames to process from FILE; if FILE is\n" " omitted, filenames are read from the standard input;\n" " filenames must be terminated with the newline character\n" " --files0[=FILE] like --files but use the null character as terminator")); } if (long_help) { puts(_("\n Basic file format and compression options:\n")); puts(_( " -F, --format=FMT file format to encode or decode; possible values are\n" " `auto' (default), `xz', `lzma', `lzip', and `raw'\n" " -C, --check=CHECK integrity check type: `none' (use with caution),\n" " `crc32', `crc64' (default), or `sha256'")); puts(_( " --ignore-check don't verify the integrity check when decompressing")); } puts(_( " -0 ... -9 compression preset; default is 6; take compressor *and*\n" " decompressor memory usage into account before using 7-9!")); puts(_( " -e, --extreme try to improve compression ratio by using more CPU time;\n" " does not affect decompressor memory requirements")); puts(_( " -T, --threads=NUM use at most NUM threads; the default is 1; set to 0\n" " to use as many threads as there are processor cores")); if (long_help) { puts(_( " --block-size=SIZE\n" " start a new .xz block after every SIZE bytes of input;\n" " use this to set the block size for threaded compression")); puts(_( " --block-list=SIZES\n" " start a new .xz block after the given comma-separated\n" " intervals of uncompressed data")); puts(_( " --flush-timeout=TIMEOUT\n" " when compressing, if more than TIMEOUT milliseconds has\n" " passed since the previous flush and reading more input\n" " would block, all pending data is flushed out" )); puts(_( // xgettext:no-c-format " --memlimit-compress=LIMIT\n" " --memlimit-decompress=LIMIT\n" " --memlimit-mt-decompress=LIMIT\n" " -M, --memlimit=LIMIT\n" " set memory usage limit for compression, decompression,\n" " threaded decompression, or all of these; LIMIT is in\n" " bytes, % of RAM, or 0 for defaults")); puts(_( " --no-adjust if compression settings exceed the memory usage limit,\n" " give an error instead of adjusting the settings downwards")); } if (long_help) { puts(_( "\n Custom filter chain for compression (alternative for using presets):")); #if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) // TRANSLATORS: The word "literal" in "literal context bits" // means how many "context bits" to use when encoding // literals. A literal is a single 8-bit byte. It doesn't // mean "literally" here. puts(_( "\n" " --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" " --lzma2[=OPTS] more of the following options (valid values; default):\n" " preset=PRE reset options to a preset (0-9[e])\n" " dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" " lc=NUM number of literal context bits (0-4; 3)\n" " lp=NUM number of literal position bits (0-4; 0)\n" " pb=NUM number of position bits (0-4; 2)\n" " mode=MODE compression mode (fast, normal; normal)\n" " nice=NUM nice length of a match (2-273; 64)\n" " mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" " depth=NUM maximum search depth; 0=automatic (default)")); #endif puts(_( "\n" " --x86[=OPTS] x86 BCJ filter (32-bit and 64-bit)\n" " --arm[=OPTS] ARM BCJ filter\n" " --armthumb[=OPTS] ARM-Thumb BCJ filter\n" " --arm64[=OPTS] ARM64 BCJ filter\n" " --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" " --ia64[=OPTS] IA-64 (Itanium) BCJ filter\n" " --sparc[=OPTS] SPARC BCJ filter\n" " Valid OPTS for all BCJ filters:\n" " start=NUM start offset for conversions (default=0)")); #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) puts(_( "\n" " --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" " dist=NUM distance between bytes being subtracted\n" " from each other (1-256; 1)")); #endif } if (long_help) puts(_("\n Other options:\n")); puts(_( " -q, --quiet suppress warnings; specify twice to suppress errors too\n" " -v, --verbose be verbose; specify twice for even more verbose")); if (long_help) { puts(_( " -Q, --no-warn make warnings not affect the exit status")); puts(_( " --robot use machine-parsable messages (useful for scripts)")); puts(""); puts(_( " --info-memory display the total amount of RAM and the currently active\n" " memory usage limits, and exit")); puts(_( " -h, --help display the short help (lists only the basic options)\n" " -H, --long-help display this long help and exit")); } else { puts(_( " -h, --help display this short help and exit\n" " -H, --long-help display the long help (lists also the advanced options)")); } puts(_( " -V, --version display the version number and exit")); puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); // TRANSLATORS: This message indicates the bug reporting address // for this package. Please add _another line_ saying // "Report translation bugs to <...>\n" with the email or WWW // address for translation bugs. Thanks. printf(_("Report bugs to <%s> (in English or Finnish).\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); #if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE puts(_( "THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE.")); #endif tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } diff --git a/contrib/xz/src/xz/xz.1 b/contrib/xz/src/xz/xz.1 index aefb79f2fdc0..8e85a17e920e 100644 --- a/contrib/xz/src/xz/xz.1 +++ b/contrib/xz/src/xz/xz.1 @@ -1,3020 +1,3021 @@ '\" t .\" -.\" Author: Lasse Collin +.\" Authors: Lasse Collin +.\" Jia Tan .\" .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH XZ 1 "2022-12-01" "Tukaani" "XZ Utils" +.TH XZ 1 "2023-07-17" "Tukaani" "XZ Utils" . .SH NAME xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files . .SH SYNOPSIS .B xz .RI [ option... ] .RI [ file... ] . .SH COMMAND ALIASES .B unxz is equivalent to .BR "xz \-\-decompress" . .br .B xzcat is equivalent to .BR "xz \-\-decompress \-\-stdout" . .br .B lzma is equivalent to .BR "xz \-\-format=lzma" . .br .B unlzma is equivalent to .BR "xz \-\-format=lzma \-\-decompress" . .br .B lzcat is equivalent to .BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . .PP When writing scripts that need to decompress files, it is recommended to always use the name .B xz with appropriate arguments .RB ( "xz \-d" or .BR "xz \-dc" ) instead of the names .B unxz and .BR xzcat . . .SH DESCRIPTION .B xz is a general-purpose data compression tool with command line syntax similar to .BR gzip (1) and .BR bzip2 (1). The native file format is the .B .xz format, but the legacy .B .lzma format used by LZMA Utils and raw compressed streams with no container format headers are also supported. In addition, decompression of the .B .lz format used by .B lzip is supported. .PP .B xz compresses or decompresses each .I file according to the selected operation mode. If no .I files are given or .I file is .BR \- , .B xz reads from standard input and writes the processed data to standard output. .B xz will refuse (display an error and skip the .IR file ) to write compressed data to standard output if it is a terminal. Similarly, .B xz will refuse to read compressed data from standard input if it is a terminal. .PP Unless .B \-\-stdout is specified, .I files other than .B \- are written to a new file whose name is derived from the source .I file name: .IP \(bu 3 When compressing, the suffix of the target file format .RB ( .xz or .BR .lzma ) is appended to the source filename to get the target filename. .IP \(bu 3 When decompressing, the .BR .xz , .BR .lzma , or .B .lz suffix is removed from the filename to get the target filename. .B xz also recognizes the suffixes .B .txz and .BR .tlz , and replaces them with the .B .tar suffix. .PP If the target file already exists, an error is displayed and the .I file is skipped. .PP Unless writing to standard output, .B xz will display a warning and skip the .I file if any of the following applies: .IP \(bu 3 .I File is not a regular file. Symbolic links are not followed, and thus they are not considered to be regular files. .IP \(bu 3 .I File has more than one hard link. .IP \(bu 3 .I File has setuid, setgid, or sticky bit set. .IP \(bu 3 The operation mode is set to compress and the .I file already has a suffix of the target file format .RB ( .xz or .B .txz when compressing to the .B .xz format, and .B .lzma or .B .tlz when compressing to the .B .lzma format). .IP \(bu 3 The operation mode is set to decompress and the .I file doesn't have a suffix of any of the supported file formats .RB ( .xz , .BR .txz , .BR .lzma , .BR .tlz , or .BR .lz ). .PP After successfully compressing or decompressing the .IR file , .B xz copies the owner, group, permissions, access time, and modification time from the source .I file to the target file. If copying the group fails, the permissions are modified so that the target file doesn't become accessible to users who didn't have permission to access the source .IR file . .B xz doesn't support copying other metadata like access control lists or extended attributes yet. .PP Once the target file has been successfully closed, the source .I file is removed unless .B \-\-keep was specified. The source .I file is never removed if the output is written to standard output or if an error occurs. .PP Sending .B SIGINFO or .B SIGUSR1 to the .B xz process makes it print progress information to standard error. This has only limited use since when standard error is a terminal, using .B \-\-verbose will display an automatically updating progress indicator. . .SS "Memory usage" The memory usage of .B xz varies from a few hundred kilobytes to several gigabytes depending on the compression settings. The settings used when compressing a file determine the memory requirements of the decompressor. Typically the decompressor needs 5\ % to 20\ % of the amount of memory that the compressor needed when creating the file. For example, decompressing a file created with .B xz \-9 currently requires 65\ MiB of memory. Still, it is possible to have .B .xz files that require several gigabytes of memory to decompress. .PP Especially users of older systems may find the possibility of very large memory usage annoying. To prevent uncomfortable surprises, .B xz has a built-in memory usage limiter, which is disabled by default. While some operating systems provide ways to limit the memory usage of processes, relying on it wasn't deemed to be flexible enough (for example, using .BR ulimit (1) to limit virtual memory tends to cripple .BR mmap (2)). .PP The memory usage limiter can be enabled with the command line option \fB\-\-memlimit=\fIlimit\fR. Often it is more convenient to enable the limiter by default by setting the environment variable .BR XZ_DEFAULTS , for example, .BR XZ_DEFAULTS=\-\-memlimit=150MiB . It is possible to set the limits separately for compression and decompression by using .BI \-\-memlimit\-compress= limit and \fB\-\-memlimit\-decompress=\fIlimit\fR. Using these two options outside .B XZ_DEFAULTS is rarely useful because a single run of .B xz cannot do both compression and decompression and .BI \-\-memlimit= limit (or .B \-M .IR limit ) is shorter to type on the command line. .PP If the specified memory usage limit is exceeded when decompressing, .B xz will display an error and decompressing the file will fail. If the limit is exceeded when compressing, .B xz will try to scale the settings down so that the limit is no longer exceeded (except when using .B \-\-format=raw or .BR \-\-no\-adjust ). This way the operation won't fail unless the limit is very small. The scaling of the settings is done in steps that don't match the compression level presets, for example, if the limit is only slightly less than the amount required for .BR "xz \-9" , the settings will be scaled down only a little, not all the way down to .BR "xz \-8" . . .SS "Concatenation and padding with .xz files" It is possible to concatenate .B .xz files as is. .B xz will decompress such files as if they were a single .B .xz file. .PP It is possible to insert padding between the concatenated parts or after the last part. The padding must consist of null bytes and the size of the padding must be a multiple of four bytes. This can be useful, for example, if the .B .xz file is stored on a medium that measures file sizes in 512-byte blocks. .PP Concatenation and padding are not allowed with .B .lzma files or raw streams. . .SH OPTIONS . .SS "Integer suffixes and special values" In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers. There must be no space between the integer and the suffix. .TP .B KiB Multiply the integer by 1,024 (2^10). .BR Ki , .BR k , .BR kB , .BR K , and .B KB are accepted as synonyms for .BR KiB . .TP .B MiB Multiply the integer by 1,048,576 (2^20). .BR Mi , .BR m , .BR M , and .B MB are accepted as synonyms for .BR MiB . .TP .B GiB Multiply the integer by 1,073,741,824 (2^30). .BR Gi , .BR g , .BR G , and .B GB are accepted as synonyms for .BR GiB . .PP The special value .B max can be used to indicate the maximum integer value supported by the option. . .SS "Operation mode" If multiple operation mode options are given, the last one takes effect. .TP .BR \-z ", " \-\-compress Compress. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, .B unxz implies .BR \-\-decompress ). .TP .BR \-d ", " \-\-decompress ", " \-\-uncompress Decompress. .TP .BR \-t ", " \-\-test Test the integrity of compressed .IR files . This option is equivalent to .B "\-\-decompress \-\-stdout" except that the decompressed data is discarded instead of being written to standard output. No files are created or removed. .TP .BR \-l ", " \-\-list Print information about compressed .IR files . No uncompressed output is produced, and no files are created or removed. In list mode, the program cannot read the compressed data from standard input or from other unseekable sources. .IP "" The default listing shows basic information about .IR files , one file per line. To get more detailed information, use also the .B \-\-verbose option. For even more information, use .B \-\-verbose twice, but note that this may be slow, because getting all the extra information requires many seeks. The width of verbose output exceeds 80 characters, so piping the output to, for example, .B "less\ \-S" may be convenient if the terminal isn't wide enough. .IP "" The exact output may vary between .B xz versions and different locales. For machine-readable output, .B \-\-robot \-\-list should be used. . .SS "Operation modifiers" .TP .BR \-k ", " \-\-keep Don't delete the input files. .IP "" Since .B xz 5.2.6, this option also makes .B xz compress or decompress even if the input is a symbolic link to a regular file, has more than one hard link, or has the setuid, setgid, or sticky bit set. The setuid, setgid, and sticky bits are not copied to the target file. In earlier versions this was only done with .BR \-\-force . .TP .BR \-f ", " \-\-force This option has several effects: .RS .IP \(bu 3 If the target file already exists, delete it before compressing or decompressing. .IP \(bu 3 Compress or decompress even if the input is a symbolic link to a regular file, has more than one hard link, or has the setuid, setgid, or sticky bit set. The setuid, setgid, and sticky bits are not copied to the target file. .IP \(bu 3 When used with .B \-\-decompress .B \-\-stdout and .B xz cannot recognize the type of the source file, copy the source file as is to standard output. This allows .B xzcat .B \-\-force to be used like .BR cat (1) for files that have not been compressed with .BR xz . Note that in future, .B xz might support new compressed file formats, which may make .B xz decompress more types of files instead of copying them as is to standard output. .BI \-\-format= format can be used to restrict .B xz to decompress only a single file format. .RE .TP .BR \-c ", " \-\-stdout ", " \-\-to\-stdout Write the compressed or decompressed data to standard output instead of a file. This implies .BR \-\-keep . .TP .B \-\-single\-stream Decompress only the first .B .xz stream, and silently ignore possible remaining input data following the stream. Normally such trailing garbage makes .B xz display an error. .IP "" .B xz never decompresses more than one stream from .B .lzma files or raw streams, but this option still makes .B xz ignore the possible trailing data after the .B .lzma file or raw stream. .IP "" This option has no effect if the operation mode is not .B \-\-decompress or .BR \-\-test . .TP .B \-\-no\-sparse Disable creation of sparse files. By default, if decompressing into a regular file, .B xz tries to make the file sparse if the decompressed data contains long sequences of binary zeros. It also works when writing to standard output as long as standard output is connected to a regular file and certain additional conditions are met to make it safe. Creating sparse files may save disk space and speed up the decompression by reducing the amount of disk I/O. .TP \fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf When compressing, use .I .suf as the suffix for the target file instead of .B .xz or .BR .lzma . If not writing to standard output and the source file already has the suffix .IR .suf , a warning is displayed and the file is skipped. .IP "" When decompressing, recognize files with the suffix .I .suf in addition to files with the .BR .xz , .BR .txz , .BR .lzma , .BR .tlz , or .B .lz suffix. If the source file has the suffix .IR .suf , the suffix is removed to get the target filename. .IP "" When compressing or decompressing raw streams .RB ( \-\-format=raw ), the suffix must always be specified unless writing to standard output, because there is no default suffix for raw streams. .TP \fB\-\-files\fR[\fB=\fIfile\fR] Read the filenames to process from .IR file ; if .I file is omitted, filenames are read from standard input. Filenames must be terminated with the newline character. A dash .RB ( \- ) is taken as a regular filename; it doesn't mean standard input. If filenames are given also as command line arguments, they are processed before the filenames read from .IR file . .TP \fB\-\-files0\fR[\fB=\fIfile\fR] This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except that each filename must be terminated with the null character. . .SS "Basic file format and compression options" .TP \fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat Specify the file .I format to compress or decompress: .RS .TP .B auto This is the default. When compressing, .B auto is equivalent to .BR xz . When decompressing, the format of the input file is automatically detected. Note that raw streams (created with .BR \-\-format=raw ) cannot be auto-detected. .TP .B xz Compress to the .B .xz file format, or accept only .B .xz files when decompressing. .TP .BR lzma ", " alone Compress to the legacy .B .lzma file format, or accept only .B .lzma files when decompressing. The alternative name .B alone is provided for backwards compatibility with LZMA Utils. .TP .B lzip Accept only .B .lz files when decompressing. Compression is not supported. .IP "" The .B .lz format version 0 and the unextended version 1 are supported. Version 0 files were produced by .B lzip 1.3 and older. Such files aren't common but may be found from file archives as a few source packages were released in this format. People might have old personal files in this format too. Decompression support for the format version 0 was removed in .B lzip 1.18. .IP "" .B lzip 1.4 and later create files in the format version 1. The sync flush marker extension to the format version 1 was added in .B lzip 1.6. This extension is rarely used and isn't supported by .B xz (diagnosed as corrupt input). .TP .B raw Compress or uncompress a raw stream (no headers). This is meant for advanced users only. To decode raw streams, you need use .B \-\-format=raw and explicitly specify the filter chain, which normally would have been stored in the container headers. .RE .TP \fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck Specify the type of the integrity check. The check is calculated from the uncompressed data and stored in the .B .xz file. This option has an effect only when compressing into the .B .xz format; the .B .lzma format doesn't support integrity checks. The integrity check (if any) is verified when the .B .xz file is decompressed. .IP "" Supported .I check types: .RS .TP .B none Don't calculate an integrity check at all. This is usually a bad idea. This can be useful when integrity of the data is verified by other means anyway. .TP .B crc32 Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). .TP .B crc64 Calculate CRC64 using the polynomial from ECMA-182. This is the default, since it is slightly better than CRC32 at detecting damaged files and the speed difference is negligible. .TP .B sha256 Calculate SHA-256. This is somewhat slower than CRC32 and CRC64. .RE .IP "" Integrity of the .B .xz headers is always verified with CRC32. It is not possible to change or disable it. .TP .B \-\-ignore\-check Don't verify the integrity check of the compressed data when decompressing. The CRC32 values in the .B .xz headers will still be verified normally. .IP "" .B "Do not use this option unless you know what you are doing." Possible reasons to use this option: .RS .IP \(bu 3 Trying to recover data from a corrupt .xz file. .IP \(bu 3 Speeding up decompression. This matters mostly with SHA-256 or with files that have compressed extremely well. It's recommended to not use this option for this purpose unless the file integrity is verified externally in some other way. .RE .TP .BR \-0 " ... " \-9 Select a compression preset level. The default is .BR \-6 . If multiple preset levels are specified, the last one takes effect. If a custom filter chain was already specified, setting a compression preset level clears the custom filter chain. .IP "" The differences between the presets are more significant than with .BR gzip (1) and .BR bzip2 (1). The selected compression settings determine the memory requirements of the decompressor, thus using a too high preset level might make it painful to decompress the file on an old system with little RAM. Specifically, .B "it's not a good idea to blindly use \-9 for everything" like it often is with .BR gzip (1) and .BR bzip2 (1). .RS .TP .BR "\-0" " ... " "\-3" These are somewhat fast presets. .B \-0 is sometimes faster than .B "gzip \-9" while compressing much better. The higher ones often have speed comparable to .BR bzip2 (1) with comparable or better compression ratio, although the results depend a lot on the type of data being compressed. .TP .BR "\-4" " ... " "\-6" Good to very good compression while keeping decompressor memory usage reasonable even for old systems. .B \-6 is the default, which is usually a good choice for distributing files that need to be decompressible even on systems with only 16\ MiB RAM. .RB ( \-5e or .B \-6e may be worth considering too. See .BR \-\-extreme .) .TP .B "\-7 ... \-9" These are like .B \-6 but with higher compressor and decompressor memory requirements. These are useful only when compressing files bigger than 8\ MiB, 16\ MiB, and 32\ MiB, respectively. .RE .IP "" On the same hardware, the decompression speed is approximately a constant number of bytes of compressed data per second. In other words, the better the compression, the faster the decompression will usually be. This also means that the amount of uncompressed output produced per second can vary a lot. .IP "" The following table summarises the features of the presets: .RS .RS .PP .TS tab(;); c c c c c n n n n n. Preset;DictSize;CompCPU;CompMem;DecMem \-0;256 KiB;0;3 MiB;1 MiB \-1;1 MiB;1;9 MiB;2 MiB \-2;2 MiB;2;17 MiB;3 MiB \-3;4 MiB;3;32 MiB;5 MiB \-4;4 MiB;4;48 MiB;5 MiB \-5;8 MiB;5;94 MiB;9 MiB \-6;8 MiB;6;94 MiB;9 MiB \-7;16 MiB;6;186 MiB;17 MiB \-8;32 MiB;6;370 MiB;33 MiB \-9;64 MiB;6;674 MiB;65 MiB .TE .RE .RE .IP "" Column descriptions: .RS .IP \(bu 3 DictSize is the LZMA2 dictionary size. It is waste of memory to use a dictionary bigger than the size of the uncompressed file. This is why it is good to avoid using the presets .BR \-7 " ... " \-9 when there's no real need for them. At .B \-6 and lower, the amount of memory wasted is usually low enough to not matter. .IP \(bu 3 CompCPU is a simplified representation of the LZMA2 settings that affect compression speed. The dictionary size affects speed too, so while CompCPU is the same for levels .BR \-6 " ... " \-9 , higher levels still tend to be a little slower. To get even slower and thus possibly better compression, see .BR \-\-extreme . .IP \(bu 3 CompMem contains the compressor memory requirements in the single-threaded mode. It may vary slightly between .B xz versions. Memory requirements of some of the future multithreaded modes may be dramatically higher than that of the single-threaded mode. .IP \(bu 3 DecMem contains the decompressor memory requirements. That is, the compression settings determine the memory requirements of the decompressor. The exact decompressor memory usage is slightly more than the LZMA2 dictionary size, but the values in the table have been rounded up to the next full MiB. .RE .TP .BR \-e ", " \-\-extreme Use a slower variant of the selected compression preset level .RB ( \-0 " ... " \-9 ) to hopefully get a little bit better compression ratio, but with bad luck this can also make it worse. Decompressor memory usage is not affected, but compressor memory usage increases a little at preset levels .BR \-0 " ... " \-3 . .IP "" Since there are two presets with dictionary sizes 4\ MiB and 8\ MiB, the presets .B \-3e and .B \-5e use slightly faster settings (lower CompCPU) than .B \-4e and .BR \-6e , respectively. That way no two presets are identical. .RS .RS .PP .TS tab(;); c c c c c n n n n n. Preset;DictSize;CompCPU;CompMem;DecMem \-0e;256 KiB;8;4 MiB;1 MiB \-1e;1 MiB;8;13 MiB;2 MiB \-2e;2 MiB;8;25 MiB;3 MiB \-3e;4 MiB;7;48 MiB;5 MiB \-4e;4 MiB;8;48 MiB;5 MiB \-5e;8 MiB;7;94 MiB;9 MiB \-6e;8 MiB;8;94 MiB;9 MiB \-7e;16 MiB;8;186 MiB;17 MiB \-8e;32 MiB;8;370 MiB;33 MiB \-9e;64 MiB;8;674 MiB;65 MiB .TE .RE .RE .IP "" For example, there are a total of four presets that use 8\ MiB dictionary, whose order from the fastest to the slowest is .BR \-5 , .BR \-6 , .BR \-5e , and .BR \-6e . .TP .B \-\-fast .PD 0 .TP .B \-\-best .PD These are somewhat misleading aliases for .B \-0 and .BR \-9 , respectively. These are provided only for backwards compatibility with LZMA Utils. Avoid using these options. .TP .BI \-\-block\-size= size When compressing to the .B .xz format, split the input data into blocks of .I size bytes. The blocks are compressed independently from each other, which helps with multi-threading and makes limited random-access decompression possible. This option is typically used to override the default block size in multi-threaded mode, but this option can be used in single-threaded mode too. .IP "" In multi-threaded mode about three times .I size bytes will be allocated in each thread for buffering input and output. The default .I size is three times the LZMA2 dictionary size or 1 MiB, whichever is more. Typically a good value is 2\(en4 times the size of the LZMA2 dictionary or at least 1 MiB. Using .I size less than the LZMA2 dictionary size is waste of RAM because then the LZMA2 dictionary buffer will never get fully used. The sizes of the blocks are stored in the block headers, which a future version of .B xz will use for multi-threaded decompression. .IP "" In single-threaded mode no block splitting is done by default. Setting this option doesn't affect memory usage. No size information is stored in block headers, thus files created in single-threaded mode won't be identical to files created in multi-threaded mode. The lack of size information also means that a future version of .B xz won't be able decompress the files in multi-threaded mode. .TP .BI \-\-block\-list= sizes When compressing to the .B .xz format, start a new block after the given intervals of uncompressed data. .IP "" The uncompressed .I sizes of the blocks are specified as a comma-separated list. Omitting a size (two or more consecutive commas) is a shorthand to use the size of the previous block. .IP "" If the input file is bigger than the sum of .IR sizes , the last value in .I sizes is repeated until the end of the file. A special value of .B 0 may be used as the last value to indicate that the rest of the file should be encoded as a single block. .IP "" If one specifies .I sizes that exceed the encoder's block size (either the default value in threaded mode or the value specified with \fB\-\-block\-size=\fIsize\fR), the encoder will create additional blocks while keeping the boundaries specified in .IR sizes . For example, if one specifies .B \-\-block\-size=10MiB .B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB and the input file is 80 MiB, one will get 11 blocks: 5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB. .IP "" In multi-threaded mode the sizes of the blocks are stored in the block headers. This isn't done in single-threaded mode, so the encoded output won't be identical to that of the multi-threaded mode. .TP .BI \-\-flush\-timeout= timeout When compressing, if more than .I timeout milliseconds (a positive integer) has passed since the previous flush and reading more input would block, all the pending input data is flushed from the encoder and made available in the output stream. This can be useful if .B xz is used to compress data that is streamed over a network. Small .I timeout values make the data available at the receiving end with a small delay, but large .I timeout values give better compression ratio. .IP "" This feature is disabled by default. If this option is specified more than once, the last one takes effect. The special .I timeout value of .B 0 can be used to explicitly disable this feature. .IP "" This feature is not available on non-POSIX systems. .IP "" .\" FIXME .B "This feature is still experimental." Currently .B xz is unsuitable for decompressing the stream in real time due to how .B xz does buffering. .TP .BI \-\-memlimit\-compress= limit Set a memory usage limit for compression. If this option is specified multiple times, the last one takes effect. .IP "" If the compression settings exceed the .IR limit , .B xz will attempt to adjust the settings downwards so that the limit is no longer exceeded and display a notice that automatic adjustment was done. The adjustments are done in this order: reducing the number of threads, switching to single-threaded mode if even one thread in multi-threaded mode exceeds the .IR limit , and finally reducing the LZMA2 dictionary size. .IP "" When compressing with .B \-\-format=raw or if .B \-\-no\-adjust has been specified, only the number of threads may be reduced since it can be done without affecting the compressed output. .IP "" If the .I limit cannot be met even with the adjustments described above, an error is displayed and .B xz will exit with exit status 1. .IP "" The .I limit can be specified in multiple ways: .RS .IP \(bu 3 The .I limit can be an absolute value in bytes. Using an integer suffix like .B MiB can be useful. Example: .B "\-\-memlimit\-compress=80MiB" .IP \(bu 3 The .I limit can be specified as a percentage of total physical memory (RAM). This can be useful especially when setting the .B XZ_DEFAULTS environment variable in a shell initialization script that is shared between different computers. That way the limit is automatically bigger on systems with more memory. Example: .B "\-\-memlimit\-compress=70%" .IP \(bu 3 The .I limit can be reset back to its default value by setting it to .BR 0 . This is currently equivalent to setting the .I limit to .B max (no memory usage limit). .RE .IP "" For 32-bit .B xz there is a special case: if the .I limit would be over .BR "4020\ MiB" , the .I limit is set to .BR "4020\ MiB" . On MIPS32 .B "2000\ MiB" is used instead. (The values .B 0 and .B max aren't affected by this. A similar feature doesn't exist for decompression.) This can be helpful when a 32-bit executable has access to 4\ GiB address space (2 GiB on MIPS32) while hopefully doing no harm in other situations. .IP "" See also the section .BR "Memory usage" . .TP .BI \-\-memlimit\-decompress= limit Set a memory usage limit for decompression. This also affects the .B \-\-list mode. If the operation is not possible without exceeding the .IR limit , .B xz will display an error and decompressing the file will fail. See .BI \-\-memlimit\-compress= limit for possible ways to specify the .IR limit . .TP .BI \-\-memlimit\-mt\-decompress= limit Set a memory usage limit for multi-threaded decompression. This can only affect the number of threads; this will never make .B xz refuse to decompress a file. If .I limit is too low to allow any multi-threading, the .I limit is ignored and .B xz will continue in single-threaded mode. Note that if also .B \-\-memlimit\-decompress is used, it will always apply to both single-threaded and multi-threaded modes, and so the effective .I limit for multi-threading will never be higher than the limit set with .BR \-\-memlimit\-decompress . .IP "" In contrast to the other memory usage limit options, .BI \-\-memlimit\-mt\-decompress= limit has a system-specific default .IR limit . .B "xz \-\-info\-memory" can be used to see the current value. .IP "" This option and its default value exist because without any limit the threaded decompressor could end up allocating an insane amount of memory with some input files. If the default .I limit is too low on your system, feel free to increase the .I limit but never set it to a value larger than the amount of usable RAM as with appropriate input files .B xz will attempt to use that amount of memory even with a low number of threads. Running out of memory or swapping will not improve decompression performance. .IP "" See .BI \-\-memlimit\-compress= limit for possible ways to specify the .IR limit . Setting .I limit to .B 0 resets the .I limit to the default system-specific value. .IP "" .TP \fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit This is equivalent to specifying .BI \-\-memlimit\-compress= limit .BI \-\-memlimit-decompress= limit \fB\-\-memlimit\-mt\-decompress=\fIlimit\fR. .TP .B \-\-no\-adjust Display an error and exit if the memory usage limit cannot be met without adjusting settings that affect the compressed output. That is, this prevents .B xz from switching the encoder from multi-threaded mode to single-threaded mode and from reducing the LZMA2 dictionary size. Even when this option is used the number of threads may be reduced to meet the memory usage limit as that won't affect the compressed output. .IP "" Automatic adjusting is always disabled when creating raw streams .RB ( \-\-format=raw ). .TP \fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads Specify the number of worker threads to use. Setting .I threads to a special value .B 0 makes .B xz use up to as many threads as the processor(s) on the system support. The actual number of threads can be fewer than .I threads if the input file is not big enough for threading with the given settings or if using more threads would exceed the memory usage limit. .IP "" The single-threaded and multi-threaded compressors produce different output. Single-threaded compressor will give the smallest file size but only the output from the multi-threaded compressor can be decompressed using multiple threads. Setting .I threads to .B 1 will use the single-threaded mode. Setting .I threads to any other value, including .BR 0 , will use the multi-threaded compressor even if the system supports only one hardware thread. .RB ( xz 5.2.x used single-threaded mode in this situation.) .IP "" To use multi-threaded mode with only one thread, set .I threads to .BR +1 . The .B + prefix has no effect with values other than .BR 1 . A memory usage limit can still make .B xz switch to single-threaded mode unless .B \-\-no\-adjust is used. Support for the .B + prefix was added in .B xz 5.4.0. .IP "" If an automatic number of threads has been requested and no memory usage limit has been specified, then a system-specific default soft limit will be used to possibly limit the number of threads. It is a soft limit in sense that it is ignored if the number of threads becomes one, thus a soft limit will never stop .B xz from compressing or decompressing. This default soft limit will not make .B xz switch from multi-threaded mode to single-threaded mode. The active limits can be seen with .BR "xz \-\-info\-memory" . .IP "" Currently the only threading method is to split the input into blocks and compress them independently from each other. The default block size depends on the compression level and can be overridden with the .BI \-\-block\-size= size option. .IP "" Threaded decompression only works on files that contain multiple blocks with size information in block headers. All large enough files compressed in multi-threaded mode meet this condition, but files compressed in single-threaded mode don't even if .BI \-\-block\-size= size has been used. . .SS "Custom compressor filter chains" A custom filter chain allows specifying the compression settings in detail instead of relying on the settings associated to the presets. When a custom filter chain is specified, preset options .RB ( \-0 \&...\& .B \-9 and .BR \-\-extreme ) earlier on the command line are forgotten. If a preset option is specified after one or more custom filter chain options, the new preset takes effect and the custom filter chain options specified earlier are forgotten. .PP A filter chain is comparable to piping on the command line. When compressing, the uncompressed input goes to the first filter, whose output goes to the next filter (if any). The output of the last filter gets written to the compressed file. The maximum number of filters in the chain is four, but typically a filter chain has only one or two filters. .PP Many filters have limitations on where they can be in the filter chain: some filters can work only as the last filter in the chain, some only as a non-last filter, and some work in any position in the chain. Depending on the filter, this limitation is either inherent to the filter design or exists to prevent security issues. .PP A custom filter chain is specified by using one or more filter options in the order they are wanted in the filter chain. That is, the order of filter options is significant! When decoding raw streams .RB ( \-\-format=raw ), the filter chain is specified in the same order as it was specified when compressing. .PP Filters take filter-specific .I options as a comma-separated list. Extra commas in .I options are ignored. Every option has a default value, so you need to specify only those you want to change. .PP To see the whole filter chain and .IR options , use .B "xz \-vv" (that is, use .B \-\-verbose twice). This works also for viewing the filter chain options used by presets. .TP \fB\-\-lzma1\fR[\fB=\fIoptions\fR] .PD 0 .TP \fB\-\-lzma2\fR[\fB=\fIoptions\fR] .PD Add LZMA1 or LZMA2 filter to the filter chain. These filters can be used only as the last filter in the chain. .IP "" LZMA1 is a legacy filter, which is supported almost solely due to the legacy .B .lzma file format, which supports only LZMA1. LZMA2 is an updated version of LZMA1 to fix some practical issues of LZMA1. The .B .xz format uses LZMA2 and doesn't support LZMA1 at all. Compression speed and ratios of LZMA1 and LZMA2 are practically the same. .IP "" LZMA1 and LZMA2 share the same set of .IR options : .RS .TP .BI preset= preset Reset all LZMA1 or LZMA2 .I options to .IR preset . .I Preset consist of an integer, which may be followed by single-letter preset modifiers. The integer can be from .B 0 to .BR 9 , matching the command line options .B \-0 \&...\& .BR \-9 . The only supported modifier is currently .BR e , which matches .BR \-\-extreme . If no .B preset is specified, the default values of LZMA1 or LZMA2 .I options are taken from the preset .BR 6 . .TP .BI dict= size Dictionary (history buffer) .I size indicates how many bytes of the recently processed uncompressed data is kept in memory. The algorithm tries to find repeating byte sequences (matches) in the uncompressed data, and replace them with references to the data currently in the dictionary. The bigger the dictionary, the higher is the chance to find a match. Thus, increasing dictionary .I size usually improves compression ratio, but a dictionary bigger than the uncompressed file is waste of memory. .IP "" Typical dictionary .I size is from 64\ KiB to 64\ MiB. The minimum is 4\ KiB. The maximum for compression is currently 1.5\ GiB (1536\ MiB). The decompressor already supports dictionaries up to one byte less than 4\ GiB, which is the maximum for the LZMA1 and LZMA2 stream formats. .IP "" Dictionary .I size and match finder .RI ( mf ) together determine the memory usage of the LZMA1 or LZMA2 encoder. The same (or bigger) dictionary .I size is required for decompressing that was used when compressing, thus the memory usage of the decoder is determined by the dictionary size used when compressing. The .B .xz headers store the dictionary .I size either as .RI "2^" n or .RI "2^" n " + 2^(" n "\-1)," so these .I sizes are somewhat preferred for compression. Other .I sizes will get rounded up when stored in the .B .xz headers. .TP .BI lc= lc Specify the number of literal context bits. The minimum is 0 and the maximum is 4; the default is 3. In addition, the sum of .I lc and .I lp must not exceed 4. .IP "" All bytes that cannot be encoded as matches are encoded as literals. That is, literals are simply 8-bit bytes that are encoded one at a time. .IP "" The literal coding makes an assumption that the highest .I lc bits of the previous uncompressed byte correlate with the next byte. For example, in typical English text, an upper-case letter is often followed by a lower-case letter, and a lower-case letter is usually followed by another lower-case letter. In the US-ASCII character set, the highest three bits are 010 for upper-case letters and 011 for lower-case letters. When .I lc is at least 3, the literal coding can take advantage of this property in the uncompressed data. .IP "" The default value (3) is usually good. If you want maximum compression, test .BR lc=4 . Sometimes it helps a little, and sometimes it makes compression worse. If it makes it worse, test .B lc=2 too. .TP .BI lp= lp Specify the number of literal position bits. The minimum is 0 and the maximum is 4; the default is 0. .IP "" .I Lp affects what kind of alignment in the uncompressed data is assumed when encoding literals. See .I pb below for more information about alignment. .TP .BI pb= pb Specify the number of position bits. The minimum is 0 and the maximum is 4; the default is 2. .IP "" .I Pb affects what kind of alignment in the uncompressed data is assumed in general. The default means four-byte alignment .RI (2^ pb =2^2=4), which is often a good choice when there's no better guess. .IP "" When the alignment is known, setting .I pb accordingly may reduce the file size a little. For example, with text files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), setting .B pb=0 can improve compression slightly. For UTF-16 text, .B pb=1 is a good choice. If the alignment is an odd number like 3 bytes, .B pb=0 might be the best choice. .IP "" Even though the assumed alignment can be adjusted with .I pb and .IR lp , LZMA1 and LZMA2 still slightly favor 16-byte alignment. It might be worth taking into account when designing file formats that are likely to be often compressed with LZMA1 or LZMA2. .TP .BI mf= mf Match finder has a major effect on encoder speed, memory usage, and compression ratio. Usually Hash Chain match finders are faster than Binary Tree match finders. The default depends on the .IR preset : 0 uses .BR hc3 , 1\(en3 use .BR hc4 , and the rest use .BR bt4 . .IP "" The following match finders are supported. The memory usage formulas below are rough approximations, which are closest to the reality when .I dict is a power of two. .RS .TP .B hc3 Hash Chain with 2- and 3-byte hashing .br Minimum value for .IR nice : 3 .br Memory usage: .br .I dict * 7.5 (if .I dict <= 16 MiB); .br .I dict * 5.5 + 64 MiB (if .I dict > 16 MiB) .TP .B hc4 Hash Chain with 2-, 3-, and 4-byte hashing .br Minimum value for .IR nice : 4 .br Memory usage: .br .I dict * 7.5 (if .I dict <= 32 MiB); .br .I dict * 6.5 (if .I dict > 32 MiB) .TP .B bt2 Binary Tree with 2-byte hashing .br Minimum value for .IR nice : 2 .br Memory usage: .I dict * 9.5 .TP .B bt3 Binary Tree with 2- and 3-byte hashing .br Minimum value for .IR nice : 3 .br Memory usage: .br .I dict * 11.5 (if .I dict <= 16 MiB); .br .I dict * 9.5 + 64 MiB (if .I dict > 16 MiB) .TP .B bt4 Binary Tree with 2-, 3-, and 4-byte hashing .br Minimum value for .IR nice : 4 .br Memory usage: .br .I dict * 11.5 (if .I dict <= 32 MiB); .br .I dict * 10.5 (if .I dict > 32 MiB) .RE .TP .BI mode= mode Compression .I mode specifies the method to analyze the data produced by the match finder. Supported .I modes are .B fast and .BR normal . The default is .B fast for .I presets 0\(en3 and .B normal for .I presets 4\(en9. .IP "" Usually .B fast is used with Hash Chain match finders and .B normal with Binary Tree match finders. This is also what the .I presets do. .TP .BI nice= nice Specify what is considered to be a nice length for a match. Once a match of at least .I nice bytes is found, the algorithm stops looking for possibly better matches. .IP "" .I Nice can be 2\(en273 bytes. Higher values tend to give better compression ratio at the expense of speed. The default depends on the .IR preset . .TP .BI depth= depth Specify the maximum search depth in the match finder. The default is the special value of 0, which makes the compressor determine a reasonable .I depth from .I mf and .IR nice . .IP "" Reasonable .I depth for Hash Chains is 4\(en100 and 16\(en1000 for Binary Trees. Using very high values for .I depth can make the encoder extremely slow with some files. Avoid setting the .I depth over 1000 unless you are prepared to interrupt the compression in case it is taking far too long. .RE .IP "" When decoding raw streams .RB ( \-\-format=raw ), LZMA2 needs only the dictionary .IR size . LZMA1 needs also .IR lc , .IR lp , and .IR pb . .TP \fB\-\-x86\fR[\fB=\fIoptions\fR] .PD 0 .TP \fB\-\-arm\fR[\fB=\fIoptions\fR] .TP \fB\-\-armthumb\fR[\fB=\fIoptions\fR] .TP \fB\-\-arm64\fR[\fB=\fIoptions\fR] .TP \fB\-\-powerpc\fR[\fB=\fIoptions\fR] .TP \fB\-\-ia64\fR[\fB=\fIoptions\fR] .TP \fB\-\-sparc\fR[\fB=\fIoptions\fR] .PD Add a branch/call/jump (BCJ) filter to the filter chain. These filters can be used only as a non-last filter in the filter chain. .IP "" A BCJ filter converts relative addresses in the machine code to their absolute counterparts. This doesn't change the size of the data but it increases redundancy, which can help LZMA2 to produce 0\(en15\ % smaller .B .xz file. The BCJ filters are always reversible, so using a BCJ filter for wrong type of data doesn't cause any data loss, although it may make the compression ratio slightly worse. The BCJ filters are very fast and use an insignificant amount of memory. .IP "" These BCJ filters have known problems related to the compression ratio: .RS .IP \(bu 3 Some types of files containing executable code (for example, object files, static libraries, and Linux kernel modules) have the addresses in the instructions filled with filler values. These BCJ filters will still do the address conversion, which will make the compression worse with these files. .IP \(bu 3 If a BCJ filter is applied on an archive, it is possible that it makes the compression ratio worse than not using a BCJ filter. For example, if there are similar or even identical executables then filtering will likely make the files less similar and thus compression is worse. The contents of non-executable files in the same archive can matter too. In practice one has to try with and without a BCJ filter to see which is better in each situation. .RE .IP "" Different instruction sets have different alignment: the executable file must be aligned to a multiple of this value in the input data to make the filter work. .RS .RS .PP .TS tab(;); l n l l n l. Filter;Alignment;Notes x86;1;32-bit or 64-bit x86 ARM;4; ARM-Thumb;2; ARM64;4;4096-byte alignment is best PowerPC;4;Big endian only IA-64;16;Itanium SPARC;4; .TE .RE .RE .IP "" Since the BCJ-filtered data is usually compressed with LZMA2, the compression ratio may be improved slightly if the LZMA2 options are set to match the alignment of the selected BCJ filter. For example, with the IA-64 filter, it's good to set .B pb=4 or even .B pb=4,lp=4,lc=0 with LZMA2 (2^4=16). The x86 filter is an exception; it's usually good to stick to LZMA2's default four-byte alignment when compressing x86 executables. .IP "" All BCJ filters support the same .IR options : .RS .TP .BI start= offset Specify the start .I offset that is used when converting between relative and absolute addresses. The .I offset must be a multiple of the alignment of the filter (see the table above). The default is zero. In practice, the default is good; specifying a custom .I offset is almost never useful. .RE .TP \fB\-\-delta\fR[\fB=\fIoptions\fR] Add the Delta filter to the filter chain. The Delta filter can be only used as a non-last filter in the filter chain. .IP "" Currently only simple byte-wise delta calculation is supported. It can be useful when compressing, for example, uncompressed bitmap images or uncompressed PCM audio. However, special purpose algorithms may give significantly better results than Delta + LZMA2. This is true especially with audio, which compresses faster and better, for example, with .BR flac (1). .IP "" Supported .IR options : .RS .TP .BI dist= distance Specify the .I distance of the delta calculation in bytes. .I distance must be 1\(en256. The default is 1. .IP "" For example, with .B dist=2 and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02. .RE . .SS "Other options" .TP .BR \-q ", " \-\-quiet Suppress warnings and notices. Specify this twice to suppress errors too. This option has no effect on the exit status. That is, even if a warning was suppressed, the exit status to indicate a warning is still used. .TP .BR \-v ", " \-\-verbose Be verbose. If standard error is connected to a terminal, .B xz will display a progress indicator. Specifying .B \-\-verbose twice will give even more verbose output. .IP "" The progress indicator shows the following information: .RS .IP \(bu 3 Completion percentage is shown if the size of the input file is known. That is, the percentage cannot be shown in pipes. .IP \(bu 3 Amount of compressed data produced (compressing) or consumed (decompressing). .IP \(bu 3 Amount of uncompressed data consumed (compressing) or produced (decompressing). .IP \(bu 3 Compression ratio, which is calculated by dividing the amount of compressed data processed so far by the amount of uncompressed data processed so far. .IP \(bu 3 Compression or decompression speed. This is measured as the amount of uncompressed data consumed (compression) or produced (decompression) per second. It is shown after a few seconds have passed since .B xz started processing the file. .IP \(bu 3 Elapsed time in the format M:SS or H:MM:SS. .IP \(bu 3 Estimated remaining time is shown only when the size of the input file is known and a couple of seconds have already passed since .B xz started processing the file. The time is shown in a less precise format which never has any colons, for example, 2 min 30 s. .RE .IP "" When standard error is not a terminal, .B \-\-verbose will make .B xz print the filename, compressed size, uncompressed size, compression ratio, and possibly also the speed and elapsed time on a single line to standard error after compressing or decompressing the file. The speed and elapsed time are included only when the operation took at least a few seconds. If the operation didn't finish, for example, due to user interruption, also the completion percentage is printed if the size of the input file is known. .TP .BR \-Q ", " \-\-no\-warn Don't set the exit status to 2 even if a condition worth a warning was detected. This option doesn't affect the verbosity level, thus both .B \-\-quiet and .B \-\-no\-warn have to be used to not display warnings and to not alter the exit status. .TP .B \-\-robot Print messages in a machine-parsable format. This is intended to ease writing frontends that want to use .B xz instead of liblzma, which may be the case with various scripts. The output with this option enabled is meant to be stable across .B xz releases. See the section .B "ROBOT MODE" for details. .TP .B \-\-info\-memory Display, in human-readable format, how much physical memory (RAM) and how many processor threads .B xz thinks the system has and the memory usage limits for compression and decompression, and exit successfully. .TP .BR \-h ", " \-\-help Display a help message describing the most commonly used options, and exit successfully. .TP .BR \-H ", " \-\-long\-help Display a help message describing all features of .BR xz , and exit successfully .TP .BR \-V ", " \-\-version Display the version number of .B xz and liblzma in human readable format. To get machine-parsable output, specify .B \-\-robot before .BR \-\-version . . .SH "ROBOT MODE" The robot mode is activated with the .B \-\-robot option. It makes the output of .B xz easier to parse by other programs. Currently .B \-\-robot is supported only together with .BR \-\-version , .BR \-\-info\-memory , and .BR \-\-list . It will be supported for compression and decompression in the future. . .SS Version .B "xz \-\-robot \-\-version" -will print the version number of +prints the version number of .B xz and liblzma in the following format: .PP .BI XZ_VERSION= XYYYZZZS .br .BI LIBLZMA_VERSION= XYYYZZZS .TP .I X Major version. .TP .I YYY Minor version. Even numbers are stable. Odd numbers are alpha or beta versions. .TP .I ZZZ Patch level for stable releases or just a counter for development releases. .TP .I S Stability. 0 is alpha, 1 is beta, and 2 is stable. .I S should be always 2 when .I YYY is even. .PP .I XYYYZZZS are the same on both lines if .B xz and liblzma are from the same XZ Utils release. .PP Examples: 4.999.9beta is .B 49990091 and 5.0.0 is .BR 50000002 . . .SS "Memory limit information" .B "xz \-\-robot \-\-info\-memory" -prints a single line with three tab-separated columns: +prints a single line with multiple tab-separated columns: .IP 1. 4 Total amount of physical memory (RAM) in bytes. .IP 2. 4 Memory usage limit for compression in bytes .RB ( \-\-memlimit\-compress ). A special value of .B 0 indicates the default setting which for single-threaded mode is the same as no limit. .IP 3. 4 Memory usage limit for decompression in bytes .RB ( \-\-memlimit\-decompress ). A special value of .B 0 indicates the default setting which for single-threaded mode is the same as no limit. .IP 4. 4 Since .B xz 5.3.4alpha: Memory usage for multi-threaded decompression in bytes .RB ( \-\-memlimit\-mt\-decompress ). This is never zero because a system-specific default value shown in the column 5 is used if no limit has been specified explicitly. This is also never greater than the value in the column 3 even if a larger value has been specified with .BR \-\-memlimit\-mt\-decompress . .IP 5. 4 Since .B xz 5.3.4alpha: A system-specific default memory usage limit that is used to limit the number of threads when compressing with an automatic number of threads .RB ( \-\-threads=0 ) and no memory usage limit has been specified .RB ( \-\-memlimit\-compress ). This is also used as the default value for .BR \-\-memlimit\-mt\-decompress . .IP 6. 4 Since .B xz 5.3.4alpha: Number of available processor threads. .PP In the future, the output of .B "xz \-\-robot \-\-info\-memory" may have more columns, but never more than a single line. . .SS "List mode" .B "xz \-\-robot \-\-list" uses tab-separated output. The first column of every line has a string that indicates the type of the information found on that line: .TP .B name This is always the first line when starting to list a file. The second column on the line is the filename. .TP .B file This line contains overall information about the .B .xz file. This line is always printed after the .B name line. .TP .B stream This line type is used only when .B \-\-verbose was specified. There are as many .B stream lines as there are streams in the .B .xz file. .TP .B block This line type is used only when .B \-\-verbose was specified. There are as many .B block lines as there are blocks in the .B .xz file. The .B block lines are shown after all the .B stream lines; different line types are not interleaved. .TP .B summary This line type is used only when .B \-\-verbose was specified twice. This line is printed after all .B block lines. Like the .B file line, the .B summary line contains overall information about the .B .xz file. .TP .B totals This line is always the very last line of the list output. It shows the total counts and sizes. .PP The columns of the .B file lines: .PD 0 .RS .IP 2. 4 Number of streams in the file .IP 3. 4 Total number of blocks in the stream(s) .IP 4. 4 Compressed size of the file .IP 5. 4 Uncompressed size of the file .IP 6. 4 Compression ratio, for example, .BR 0.123 . If ratio is over 9.999, three dashes .RB ( \-\-\- ) are displayed instead of the ratio. .IP 7. 4 Comma-separated list of integrity check names. The following strings are used for the known check types: .BR None , .BR CRC32 , .BR CRC64 , and .BR SHA\-256 . For unknown check types, .BI Unknown\- N is used, where .I N is the Check ID as a decimal number (one or two digits). .IP 8. 4 Total size of stream padding in the file .RE .PD .PP The columns of the .B stream lines: .PD 0 .RS .IP 2. 4 Stream number (the first stream is 1) .IP 3. 4 Number of blocks in the stream .IP 4. 4 Compressed start offset .IP 5. 4 Uncompressed start offset .IP 6. 4 Compressed size (does not include stream padding) .IP 7. 4 Uncompressed size .IP 8. 4 Compression ratio .IP 9. 4 Name of the integrity check .IP 10. 4 Size of stream padding .RE .PD .PP The columns of the .B block lines: .PD 0 .RS .IP 2. 4 Number of the stream containing this block .IP 3. 4 Block number relative to the beginning of the stream (the first block is 1) .IP 4. 4 Block number relative to the beginning of the file .IP 5. 4 Compressed start offset relative to the beginning of the file .IP 6. 4 Uncompressed start offset relative to the beginning of the file .IP 7. 4 Total compressed size of the block (includes headers) .IP 8. 4 Uncompressed size .IP 9. 4 Compression ratio .IP 10. 4 Name of the integrity check .RE .PD .PP If .B \-\-verbose was specified twice, additional columns are included on the .B block lines. These are not displayed with a single .BR \-\-verbose , because getting this information requires many seeks and can thus be slow: .PD 0 .RS .IP 11. 4 Value of the integrity check in hexadecimal .IP 12. 4 Block header size .IP 13. 4 Block flags: .B c indicates that compressed size is present, and .B u indicates that uncompressed size is present. If the flag is not set, a dash .RB ( \- ) is shown instead to keep the string length fixed. New flags may be added to the end of the string in the future. .IP 14. 4 Size of the actual compressed data in the block (this excludes the block header, block padding, and check fields) .IP 15. 4 Amount of memory (in bytes) required to decompress this block with this .B xz version .IP 16. 4 Filter chain. Note that most of the options used at compression time cannot be known, because only the options that are needed for decompression are stored in the .B .xz headers. .RE .PD .PP The columns of the .B summary lines: .PD 0 .RS .IP 2. 4 Amount of memory (in bytes) required to decompress this file with this .B xz version .IP 3. 4 .B yes or .B no indicating if all block headers have both compressed size and uncompressed size stored in them .PP .I Since .B xz .I 5.1.2alpha: .IP 4. 4 Minimum .B xz version required to decompress the file .RE .PD .PP The columns of the .B totals line: .PD 0 .RS .IP 2. 4 Number of streams .IP 3. 4 Number of blocks .IP 4. 4 Compressed size .IP 5. 4 Uncompressed size .IP 6. 4 Average compression ratio .IP 7. 4 Comma-separated list of integrity check names that were present in the files .IP 8. 4 Stream padding size .IP 9. 4 Number of files. This is here to keep the order of the earlier columns the same as on .B file lines. .PD .RE .PP If .B \-\-verbose was specified twice, additional columns are included on the .B totals line: .PD 0 .RS .IP 10. 4 Maximum amount of memory (in bytes) required to decompress the files with this .B xz version .IP 11. 4 .B yes or .B no indicating if all block headers have both compressed size and uncompressed size stored in them .PP .I Since .B xz .I 5.1.2alpha: .IP 12. 4 Minimum .B xz version required to decompress the file .RE .PD .PP Future versions may add new line types and new columns can be added to the existing line types, but the existing columns won't be changed. . .SH "EXIT STATUS" .TP .B 0 All is good. .TP .B 1 An error occurred. .TP .B 2 Something worth a warning occurred, but no actual errors occurred. .PP Notices (not warnings or errors) printed on standard error don't affect the exit status. . .SH ENVIRONMENT .B xz parses space-separated lists of options from the environment variables .B XZ_DEFAULTS and .BR XZ_OPT , in this order, before parsing the options from the command line. Note that only options are parsed from the environment variables; all non-options are silently ignored. Parsing is done with .BR getopt_long (3) which is used also for the command line arguments. .TP .B XZ_DEFAULTS User-specific or system-wide default options. Typically this is set in a shell initialization script to enable .BR xz 's memory usage limiter by default. Excluding shell initialization scripts and similar special cases, scripts must never set or unset .BR XZ_DEFAULTS . .TP .B XZ_OPT This is for passing options to .B xz when it is not possible to set the options directly on the .B xz command line. This is the case when .B xz is run by a script or tool, for example, GNU .BR tar (1): .RS .RS .PP .nf .ft CW XZ_OPT=\-2v tar caf foo.tar.xz foo .ft R .fi .RE .RE .IP "" Scripts may use .BR XZ_OPT , for example, to set script-specific default compression options. It is still recommended to allow users to override .B XZ_OPT if that is reasonable. For example, in .BR sh (1) scripts one may use something like this: .RS .RS .PP .nf .ft CW XZ_OPT=${XZ_OPT\-"\-7e"} export XZ_OPT .ft R .fi .RE .RE . .SH "LZMA UTILS COMPATIBILITY" The command line syntax of .B xz is practically a superset of .BR lzma , .BR unlzma , and .B lzcat as found from LZMA Utils 4.32.x. In most cases, it is possible to replace LZMA Utils with XZ Utils without breaking existing scripts. There are some incompatibilities though, which may sometimes cause problems. . .SS "Compression preset levels" The numbering of the compression level presets is not identical in .B xz and LZMA Utils. The most important difference is how dictionary sizes are mapped to different presets. Dictionary size is roughly equal to the decompressor memory usage. .RS .PP .TS tab(;); c c c c n n. Level;xz;LZMA Utils \-0;256 KiB;N/A \-1;1 MiB;64 KiB \-2;2 MiB;1 MiB \-3;4 MiB;512 KiB \-4;4 MiB;1 MiB \-5;8 MiB;2 MiB \-6;8 MiB;4 MiB \-7;16 MiB;8 MiB \-8;32 MiB;16 MiB \-9;64 MiB;32 MiB .TE .RE .PP The dictionary size differences affect the compressor memory usage too, but there are some other differences between LZMA Utils and XZ Utils, which make the difference even bigger: .RS .PP .TS tab(;); c c c c n n. Level;xz;LZMA Utils 4.32.x \-0;3 MiB;N/A \-1;9 MiB;2 MiB \-2;17 MiB;12 MiB \-3;32 MiB;12 MiB \-4;48 MiB;16 MiB \-5;94 MiB;26 MiB \-6;94 MiB;45 MiB \-7;186 MiB;83 MiB \-8;370 MiB;159 MiB \-9;674 MiB;311 MiB .TE .RE .PP The default preset level in LZMA Utils is .B \-7 while in XZ Utils it is .BR \-6 , so both use an 8 MiB dictionary by default. . .SS "Streamed vs. non-streamed .lzma files" The uncompressed size of the file can be stored in the .B .lzma header. LZMA Utils does that when compressing regular files. The alternative is to mark that uncompressed size is unknown and use end-of-payload marker to indicate where the decompressor should stop. LZMA Utils uses this method when uncompressed size isn't known, which is the case, for example, in pipes. .PP .B xz supports decompressing .B .lzma files with or without end-of-payload marker, but all .B .lzma files created by .B xz will use end-of-payload marker and have uncompressed size marked as unknown in the .B .lzma header. This may be a problem in some uncommon situations. For example, a .B .lzma decompressor in an embedded device might work only with files that have known uncompressed size. If you hit this problem, you need to use LZMA Utils or LZMA SDK to create .B .lzma files with known uncompressed size. . .SS "Unsupported .lzma files" The .B .lzma format allows .I lc values up to 8, and .I lp values up to 4. LZMA Utils can decompress files with any .I lc and .IR lp , but always creates files with .B lc=3 and .BR lp=0 . Creating files with other .I lc and .I lp is possible with .B xz and with LZMA SDK. .PP The implementation of the LZMA1 filter in liblzma requires that the sum of .I lc and .I lp must not exceed 4. Thus, .B .lzma files, which exceed this limitation, cannot be decompressed with .BR xz . .PP LZMA Utils creates only .B .lzma files which have a dictionary size of .RI "2^" n (a power of 2) but accepts files with any dictionary size. liblzma accepts only .B .lzma files which have a dictionary size of .RI "2^" n or .RI "2^" n " + 2^(" n "\-1)." This is to decrease false positives when detecting .B .lzma files. .PP These limitations shouldn't be a problem in practice, since practically all .B .lzma files have been compressed with settings that liblzma will accept. . .SS "Trailing garbage" When decompressing, LZMA Utils silently ignore everything after the first .B .lzma stream. In most situations, this is a bug. This also means that LZMA Utils don't support decompressing concatenated .B .lzma files. .PP If there is data left after the first .B .lzma stream, .B xz considers the file to be corrupt unless .B \-\-single\-stream was used. This may break obscure scripts which have assumed that trailing garbage is ignored. . .SH NOTES . .SS "Compressed output may vary" The exact compressed output produced from the same uncompressed input file may vary between XZ Utils versions even if compression options are identical. This is because the encoder can be improved (faster or better compression) without affecting the file format. The output can vary even between different builds of the same XZ Utils version, if different build options are used. .PP The above means that once .B \-\-rsyncable has been implemented, the resulting files won't necessarily be rsyncable unless both old and new files have been compressed with the same xz version. This problem can be fixed if a part of the encoder implementation is frozen to keep rsyncable output stable across xz versions. . .SS "Embedded .xz decompressors" Embedded .B .xz decompressor implementations like XZ Embedded don't necessarily support files created with integrity .I check types other than .B none and .BR crc32 . Since the default is .BR \-\-check=crc64 , you must use .B \-\-check=none or .B \-\-check=crc32 when creating files for embedded systems. .PP Outside embedded systems, all .B .xz format decompressors support all the .I check types, or at least are able to decompress the file without verifying the integrity check if the particular .I check is not supported. .PP XZ Embedded supports BCJ filters, but only with the default start offset. . .SH EXAMPLES . .SS Basics Compress the file .I foo into .I foo.xz using the default compression level .RB ( \-6 ), and remove .I foo if compression is successful: .RS .PP .nf .ft CW xz foo .ft R .fi .RE .PP Decompress .I bar.xz into .I bar and don't remove .I bar.xz even if decompression is successful: .RS .PP .nf .ft CW xz \-dk bar.xz .ft R .fi .RE .PP Create .I baz.tar.xz with the preset .B \-4e .RB ( "\-4 \-\-extreme" ), which is slower than the default .BR \-6 , but needs less memory for compression and decompression (48\ MiB and 5\ MiB, respectively): .RS .PP .nf .ft CW tar cf \- baz | xz \-4e > baz.tar.xz .ft R .fi .RE .PP A mix of compressed and uncompressed files can be decompressed to standard output with a single command: .RS .PP .nf .ft CW xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt .ft R .fi .RE . .SS "Parallel compression of many files" On GNU and *BSD, .BR find (1) and .BR xargs (1) can be used to parallelize compression of many files: .RS .PP .nf .ft CW find . \-type f \e! \-name '*.xz' \-print0 \e | xargs \-0r \-P4 \-n16 xz \-T1 .ft R .fi .RE .PP The .B \-P option to .BR xargs (1) sets the number of parallel .B xz processes. The best value for the .B \-n option depends on how many files there are to be compressed. If there are only a couple of files, the value should probably be 1; with tens of thousands of files, 100 or even more may be appropriate to reduce the number of .B xz processes that .BR xargs (1) will eventually create. .PP The option .B \-T1 for .B xz is there to force it to single-threaded mode, because .BR xargs (1) is used to control the amount of parallelization. . .SS "Robot mode" Calculate how many bytes have been saved in total after compressing multiple files: .RS .PP .nf .ft CW xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}' .ft R .fi .RE .PP A script may want to know that it is using new enough .BR xz . The following .BR sh (1) script checks that the version number of the .B xz tool is at least 5.0.0. This method is compatible with old beta versions, which didn't support the .B \-\-robot option: .RS .PP .nf .ft CW if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" || [ "$XZ_VERSION" \-lt 50000002 ]; then echo "Your xz is too old." fi unset XZ_VERSION LIBLZMA_VERSION .ft R .fi .RE .PP Set a memory usage limit for decompression using .BR XZ_OPT , but if a limit has already been set, don't increase it: .RS .PP .nf .ft CW NEWLIM=$((123 << 20))\ \ # 123 MiB OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3) if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM" export XZ_OPT fi .ft R .fi .RE . .SS "Custom compressor filter chains" The simplest use for custom filter chains is customizing a LZMA2 preset. This can be useful, because the presets cover only a subset of the potentially useful combinations of compression settings. .PP The CompCPU columns of the tables from the descriptions of the options .BR "\-0" " ... " "\-9" and .B \-\-extreme are useful when customizing LZMA2 presets. Here are the relevant parts collected from those two tables: .RS .PP .TS tab(;); c c n n. Preset;CompCPU \-0;0 \-1;1 \-2;2 \-3;3 \-4;4 \-5;5 \-6;6 \-5e;7 \-6e;8 .TE .RE .PP If you know that a file requires somewhat big dictionary (for example, 32\ MiB) to compress well, but you want to compress it quicker than .B "xz \-8" would do, a preset with a low CompCPU value (for example, 1) can be modified to use a bigger dictionary: .RS .PP .nf .ft CW xz \-\-lzma2=preset=1,dict=32MiB foo.tar .ft R .fi .RE .PP With certain files, the above command may be faster than .B "xz \-6" while compressing significantly better. However, it must be emphasized that only some files benefit from a big dictionary while keeping the CompCPU value low. The most obvious situation, where a big dictionary can help a lot, is an archive containing very similar files of at least a few megabytes each. The dictionary size has to be significantly bigger than any individual file to allow LZMA2 to take full advantage of the similarities between consecutive files. .PP If very high compressor and decompressor memory usage is fine, and the file being compressed is at least several hundred megabytes, it may be useful to use an even bigger dictionary than the 64 MiB that .B "xz \-9" would use: .RS .PP .nf .ft CW xz \-vv \-\-lzma2=dict=192MiB big_foo.tar .ft R .fi .RE .PP Using .B \-vv .RB ( "\-\-verbose \-\-verbose" ) like in the above example can be useful to see the memory requirements of the compressor and decompressor. Remember that using a dictionary bigger than the size of the uncompressed file is waste of memory, so the above command isn't useful for small files. .PP Sometimes the compression time doesn't matter, but the decompressor memory usage has to be kept low, for example, to make it possible to decompress the file on an embedded system. The following command uses .B \-6e .RB ( "\-6 \-\-extreme" ) as a base and sets the dictionary to only 64\ KiB. The resulting file can be decompressed with XZ Embedded (that's why there is .BR \-\-check=crc32 ) using about 100\ KiB of memory. .RS .PP .nf .ft CW xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo .ft R .fi .RE .PP If you want to squeeze out as many bytes as possible, adjusting the number of literal context bits .RI ( lc ) and number of position bits .RI ( pb ) can sometimes help. Adjusting the number of literal position bits .RI ( lp ) might help too, but usually .I lc and .I pb are more important. For example, a source code archive contains mostly US-ASCII text, so something like the following might give slightly (like 0.1\ %) smaller file than .B "xz \-6e" (try also without .BR lc=4 ): .RS .PP .nf .ft CW xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar .ft R .fi .RE .PP Using another filter together with LZMA2 can improve compression with certain file types. For example, to compress a x86-32 or x86-64 shared library using the x86 BCJ filter: .RS .PP .nf .ft CW xz \-\-x86 \-\-lzma2 libfoo.so .ft R .fi .RE .PP Note that the order of the filter options is significant. If .B \-\-x86 is specified after .BR \-\-lzma2 , .B xz will give an error, because there cannot be any filter after LZMA2, and also because the x86 BCJ filter cannot be used as the last filter in the chain. .PP The Delta filter together with LZMA2 can give good results with bitmap images. It should usually beat PNG, which has a few more advanced filters than simple delta but uses Deflate for the actual compression. .PP The image has to be saved in uncompressed format, for example, as uncompressed TIFF. The distance parameter of the Delta filter is set to match the number of bytes per pixel in the image. For example, 24-bit RGB bitmap needs .BR dist=3 , and it is also good to pass .B pb=0 to LZMA2 to accommodate the three-byte alignment: .RS .PP .nf .ft CW xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff .ft R .fi .RE .PP If multiple images have been put into a single archive (for example, .BR .tar ), the Delta filter will work on that too as long as all images have the same number of bytes per pixel. . .SH "SEE ALSO" .BR xzdec (1), .BR xzdiff (1), .BR xzgrep (1), .BR xzless (1), .BR xzmore (1), .BR gzip (1), .BR bzip2 (1), .BR 7z (1) .PP XZ Utils: .br XZ Embedded: .br LZMA SDK: diff --git a/lib/liblzma/config.h b/lib/liblzma/config.h index e0ff56d43a56..c3fa947ec5fb 100644 --- a/lib/liblzma/config.h +++ b/lib/liblzma/config.h @@ -1,618 +1,618 @@ /* config.h. Generated from config.h.in by configure. */ /* config.h.in. Generated from configure.ac by autoheader. */ /* Define if building universal (internal helper macro) */ /* #undef AC_APPLE_UNIVERSAL_BUILD */ /* How many MiB of RAM to assume if the real amount cannot be determined. */ #define ASSUME_RAM 128 /* Define to 1 if translation of program messages to the user's native language is requested. */ /* FreeBSD - disabled intentionally */ /* #undef ENABLE_NLS */ /* Define to 1 if bswap_16 is available. */ /* #undef HAVE_BSWAP_16 */ /* Define to 1 if bswap_32 is available. */ /* #undef HAVE_BSWAP_32 */ /* Define to 1 if bswap_64 is available. */ /* #undef HAVE_BSWAP_64 */ /* Define to 1 if you have the header file. */ /* #undef HAVE_BYTESWAP_H */ #ifndef WITHOUT_CAPSICUM /* Define to 1 if Capsicum is available. */ #define HAVE_CAPSICUM 1 #endif /* Define to 1 if the system has the type `CC_SHA256_CTX'. */ /* #undef HAVE_CC_SHA256_CTX */ /* Define to 1 if you have the `CC_SHA256_Init' function. */ /* #undef HAVE_CC_SHA256_INIT */ /* Define to 1 if you have the Mac OS X function CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ /* #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES */ /* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in the CoreFoundation framework. */ /* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */ /* Define to 1 if crc32 integrity check is enabled. */ #define HAVE_CHECK_CRC32 1 /* Define to 1 if crc64 integrity check is enabled. */ #define HAVE_CHECK_CRC64 1 /* Define to 1 if sha256 integrity check is enabled. */ #define HAVE_CHECK_SHA256 1 /* Define to 1 if you have the `clock_gettime' function. */ #define HAVE_CLOCK_GETTIME 1 /* Define to 1 if `CLOCK_MONOTONIC' is declared in . */ #define HAVE_CLOCK_MONOTONIC 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_COMMONCRYPTO_COMMONDIGEST_H */ /* Define to 1 if you have the header file. */ #define HAVE_CPUID_H 1 /* Define if the GNU dcgettext() function is already present or preinstalled. */ /* FreeBSD - disabled intentionally */ /* #undef HAVE_DCGETTEXT */ /* Define to 1 if any of HAVE_DECODER_foo have been defined. */ #define HAVE_DECODERS 1 /* Define to 1 if arm decoder is enabled. */ #define HAVE_DECODER_ARM 1 /* Define to 1 if arm64 decoder is enabled. */ #define HAVE_DECODER_ARM64 1 /* Define to 1 if armthumb decoder is enabled. */ #define HAVE_DECODER_ARMTHUMB 1 /* Define to 1 if delta decoder is enabled. */ #define HAVE_DECODER_DELTA 1 /* Define to 1 if ia64 decoder is enabled. */ #define HAVE_DECODER_IA64 1 /* Define to 1 if lzma1 decoder is enabled. */ #define HAVE_DECODER_LZMA1 1 /* Define to 1 if lzma2 decoder is enabled. */ #define HAVE_DECODER_LZMA2 1 /* Define to 1 if powerpc decoder is enabled. */ #define HAVE_DECODER_POWERPC 1 /* Define to 1 if sparc decoder is enabled. */ #define HAVE_DECODER_SPARC 1 /* Define to 1 if x86 decoder is enabled. */ #define HAVE_DECODER_X86 1 /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* Define to 1 if any of HAVE_ENCODER_foo have been defined. */ #define HAVE_ENCODERS 1 /* Define to 1 if arm encoder is enabled. */ #define HAVE_ENCODER_ARM 1 /* Define to 1 if arm64 encoder is enabled. */ #define HAVE_ENCODER_ARM64 1 /* Define to 1 if armthumb encoder is enabled. */ #define HAVE_ENCODER_ARMTHUMB 1 /* Define to 1 if delta encoder is enabled. */ #define HAVE_ENCODER_DELTA 1 /* Define to 1 if ia64 encoder is enabled. */ #define HAVE_ENCODER_IA64 1 /* Define to 1 if lzma1 encoder is enabled. */ #define HAVE_ENCODER_LZMA1 1 /* Define to 1 if lzma2 encoder is enabled. */ #define HAVE_ENCODER_LZMA2 1 /* Define to 1 if powerpc encoder is enabled. */ #define HAVE_ENCODER_POWERPC 1 /* Define to 1 if sparc encoder is enabled. */ #define HAVE_ENCODER_SPARC 1 /* Define to 1 if x86 encoder is enabled. */ #define HAVE_ENCODER_X86 1 /* Define to 1 if you have the header file. */ #define HAVE_FCNTL_H 1 /* Define to 1 if __attribute__((__constructor__)) is supported for functions. */ #define HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR 1 /* Define to 1 if you have the `futimens' function. */ #define HAVE_FUTIMENS 1 /* Define to 1 if you have the `futimes' function. */ /* #undef HAVE_FUTIMES */ /* Define to 1 if you have the `futimesat' function. */ /* #undef HAVE_FUTIMESAT */ /* Define to 1 if you have the header file. */ #define HAVE_GETOPT_H 1 /* Define to 1 if you have the `getopt_long' function. */ #define HAVE_GETOPT_LONG 1 /* Define if the GNU gettext() function is already present or preinstalled. */ /* FreeBSD - disabled intentionally */ /* #undef HAVE_GETTEXT */ /* Define if you have the iconv() function and it works. */ #define HAVE_ICONV 1 /* Define to 1 if you have the header file. */ /* FreeBSD - only with clang because the base gcc does not support it */ #if defined(__clang__) && defined(__FreeBSD__) && defined(__amd64__) #define HAVE_IMMINTRIN_H 1 #endif /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* Define to 1 if you have the header file. */ #define HAVE_LIMITS_H 1 /* Define to 1 if .lz (lzip) decompression support is enabled. */ #define HAVE_LZIP_DECODER 1 /* Define to 1 if mbrtowc and mbstate_t are properly declared. */ #define HAVE_MBRTOWC 1 /* Define to 1 to enable bt2 match finder. */ #define HAVE_MF_BT2 1 /* Define to 1 to enable bt3 match finder. */ #define HAVE_MF_BT3 1 /* Define to 1 to enable bt4 match finder. */ #define HAVE_MF_BT4 1 /* Define to 1 to enable hc3 match finder. */ #define HAVE_MF_HC3 1 /* Define to 1 to enable hc4 match finder. */ #define HAVE_MF_HC4 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_MINIX_CONFIG_H */ /* Define to 1 if getopt.h declares extern int optreset. */ #define HAVE_OPTRESET 1 /* Define to 1 if you have the `pledge' function. */ /* #undef HAVE_PLEDGE */ /* Define to 1 if you have the `posix_fadvise' function. */ #define HAVE_POSIX_FADVISE 1 /* Define to 1 if `program_invocation_name' is declared in . */ /* #undef HAVE_PROGRAM_INVOCATION_NAME */ /* Define to 1 if you have the `pthread_condattr_setclock' function. */ #define HAVE_PTHREAD_CONDATTR_SETCLOCK 1 /* Have PTHREAD_PRIO_INHERIT. */ #define HAVE_PTHREAD_PRIO_INHERIT 1 /* Define to 1 if you have the `SHA256Init' function. */ /* #undef HAVE_SHA256INIT */ /* Define to 1 if the system has the type `SHA256_CTX'. */ #define HAVE_SHA256_CTX 1 /* Define to 1 if you have the header file. */ #define HAVE_SHA256_H 1 /* Define to 1 if you have the `SHA256_Init' function. */ #define HAVE_SHA256_INIT 1 /* Define to 1 if the system has the type `SHA2_CTX'. */ /* #undef HAVE_SHA2_CTX */ /* Define to 1 if you have the header file. */ /* #undef HAVE_SHA2_H */ /* Define to 1 if optimizing for size. */ /* #undef HAVE_SMALL */ /* Define to 1 if stdbool.h conforms to C99. */ #define HAVE_STDBOOL_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDIO_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if `st_atimensec' is a member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_ATIMENSEC 1 /* Define to 1 if `st_atimespec.tv_nsec' is a member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC 1 /* Define to 1 if `st_atim.st__tim.tv_nsec' is a member of `struct stat'. */ /* #undef HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC */ /* Define to 1 if `st_atim.tv_nsec' is a member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC 1 /* Define to 1 if `st_uatime' is a member of `struct stat'. */ /* #undef HAVE_STRUCT_STAT_ST_UATIME */ /* Define to 1 to if GNU/Linux-specific details are unconditionally wanted for symbol versioning. Define to 2 to if these are wanted only if also PIC is defined (allows building both shared and static liblzma at the same time with Libtool if neither --with-pic nor --without-pic is used). This define must be used together with liblzma_linux.map. */ /* #undef HAVE_SYMBOL_VERSIONS_LINUX */ /* Define to 1 if you have the header file. */ /* #undef HAVE_SYS_BYTEORDER_H */ #ifndef WITHOUT_CAPSICUM /* Define to 1 if you have the header file. */ #define HAVE_SYS_CAPSICUM_H 1 #endif /* Define to 1 if you have the header file. */ /* #undef HAVE_SYS_ENDIAN_H */ /* Define to 1 if you have the header file. */ #define HAVE_SYS_PARAM_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TIME_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if the system has the type `uintptr_t'. */ #define HAVE_UINTPTR_T 1 /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* Define to 1 if _mm_set_epi64x and _mm_clmulepi64_si128 are usable. See configure.ac for details. */ #if defined(__FreeBSD__) && defined(__amd64__) #define HAVE_USABLE_CLMUL 1 #endif /* Define to 1 if you have the `utime' function. */ /* #undef HAVE_UTIME */ /* Define to 1 if you have the `utimes' function. */ /* #undef HAVE_UTIMES */ /* Define to 1 or 0, depending whether the compiler supports simple visibility declarations. */ #define HAVE_VISIBILITY 1 /* Define to 1 if you have the header file. */ #define HAVE_WCHAR_H 1 /* Define to 1 if you have the `wcwidth' function. */ #define HAVE_WCWIDTH 1 /* Define to 1 if the system has the type `_Bool'. */ #define HAVE__BOOL 1 /* Define to 1 if you have the `_futime' function. */ /* #undef HAVE__FUTIME */ /* Define to 1 if _mm_movemask_epi8 is available. */ #if defined(__FreeBSD__) && defined(__amd64__) #define HAVE__MM_MOVEMASK_EPI8 1 #endif /* Define to 1 if the GNU C extension __builtin_assume_aligned is supported. */ #define HAVE___BUILTIN_ASSUME_ALIGNED 1 /* Define to 1 if the GNU C extensions __builtin_bswap16/32/64 are supported. */ #define HAVE___BUILTIN_BSWAPXX 1 /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" /* Define to 1 when using POSIX threads (pthreads). */ #define MYTHREAD_POSIX 1 /* Define to 1 when using Windows Vista compatible threads. This uses features that are not available on Windows XP. */ /* #undef MYTHREAD_VISTA */ /* Define to 1 when using Windows 95 (and thus XP) compatible threads. This avoids use of features that were added in Windows Vista. */ /* #undef MYTHREAD_WIN95 */ /* Define to 1 to disable debugging code. */ #define NDEBUG 1 /* Name of package */ #define PACKAGE "xz" /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "xz@tukaani.org" /* Define to the full name of this package. */ #define PACKAGE_NAME "XZ Utils" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "XZ Utils 5.4.3" +#define PACKAGE_STRING "XZ Utils 5.4.4" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "xz" /* Define to the home page for this package. */ #define PACKAGE_URL "https://tukaani.org/xz/" /* Define to the version of this package. */ -#define PACKAGE_VERSION "5.4.3" +#define PACKAGE_VERSION "5.4.4" /* Define to necessary symbol if this constant uses a non-standard name on your system. */ /* #undef PTHREAD_CREATE_JOINABLE */ /* The size of `size_t', as computed by sizeof. */ #define SIZEOF_SIZE_T 8 /* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #define STDC_HEADERS 1 /* Define to 1 if the number of available CPU cores can be detected with cpuset(2). */ #define TUKLIB_CPUCORES_CPUSET 1 /* Define to 1 if the number of available CPU cores can be detected with pstat_getdynamic(). */ /* #undef TUKLIB_CPUCORES_PSTAT_GETDYNAMIC */ /* Define to 1 if the number of available CPU cores can be detected with sched_getaffinity() */ /* #undef TUKLIB_CPUCORES_SCHED_GETAFFINITY */ /* Define to 1 if the number of available CPU cores can be detected with sysconf(_SC_NPROCESSORS_ONLN) or sysconf(_SC_NPROC_ONLN). */ /* #undef TUKLIB_CPUCORES_SYSCONF */ /* Define to 1 if the number of available CPU cores can be detected with sysctl(). */ /* #undef TUKLIB_CPUCORES_SYSCTL */ /* Define to 1 if the system supports fast unaligned access to 16-bit, 32-bit, and 64-bit integers. */ /* FreeBSD - derive from __NO_STRICT_ALIGNMENT */ /* #undef TUKLIB_FAST_UNALIGNED_ACCESS */ /* Define to 1 if the amount of physical memory can be detected with _system_configuration.physmem. */ /* #undef TUKLIB_PHYSMEM_AIX */ /* Define to 1 if the amount of physical memory can be detected with getinvent_r(). */ /* #undef TUKLIB_PHYSMEM_GETINVENT_R */ /* Define to 1 if the amount of physical memory can be detected with getsysinfo(). */ /* #undef TUKLIB_PHYSMEM_GETSYSINFO */ /* Define to 1 if the amount of physical memory can be detected with pstat_getstatic(). */ /* #undef TUKLIB_PHYSMEM_PSTAT_GETSTATIC */ /* Define to 1 if the amount of physical memory can be detected with sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES). */ #define TUKLIB_PHYSMEM_SYSCONF 1 /* Define to 1 if the amount of physical memory can be detected with sysctl(). */ /* #undef TUKLIB_PHYSMEM_SYSCTL */ /* Define to 1 if the amount of physical memory can be detected with Linux sysinfo(). */ /* #undef TUKLIB_PHYSMEM_SYSINFO */ /* Define to 1 to use unsafe type punning, e.g. char *x = ...; *(int *)x = 123; which violates strict aliasing rules and thus is undefined behavior and might result in broken code. */ /* #undef TUKLIB_USE_UNSAFE_TYPE_PUNNING */ /* Enable extensions on AIX 3, Interix. */ #ifndef _ALL_SOURCE # define _ALL_SOURCE 1 #endif /* Enable general extensions on macOS. */ #ifndef _DARWIN_C_SOURCE # define _DARWIN_C_SOURCE 1 #endif /* Enable general extensions on Solaris. */ #ifndef __EXTENSIONS__ # define __EXTENSIONS__ 1 #endif /* Enable GNU extensions on systems that have them. */ #ifndef _GNU_SOURCE # define _GNU_SOURCE 1 #endif /* Enable X/Open compliant socket functions that do not require linking with -lxnet on HP-UX 11.11. */ #ifndef _HPUX_ALT_XOPEN_SOCKET_API # define _HPUX_ALT_XOPEN_SOCKET_API 1 #endif /* Identify the host operating system as Minix. This macro does not affect the system headers' behavior. A future release of Autoconf may stop defining this macro. */ #ifndef _MINIX /* # undef _MINIX */ #endif /* Enable general extensions on NetBSD. Enable NetBSD compatibility extensions on Minix. */ #ifndef _NETBSD_SOURCE # define _NETBSD_SOURCE 1 #endif /* Enable OpenBSD compatibility extensions on NetBSD. Oddly enough, this does nothing on OpenBSD. */ #ifndef _OPENBSD_SOURCE # define _OPENBSD_SOURCE 1 #endif /* Define to 1 if needed for POSIX-compatible behavior. */ #ifndef _POSIX_SOURCE /* # undef _POSIX_SOURCE */ #endif /* Define to 2 if needed for POSIX-compatible behavior. */ #ifndef _POSIX_1_SOURCE /* # undef _POSIX_1_SOURCE */ #endif /* Enable POSIX-compatible threading on Solaris. */ #ifndef _POSIX_PTHREAD_SEMANTICS # define _POSIX_PTHREAD_SEMANTICS 1 #endif /* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ #ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ # define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1 #endif /* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ #ifndef __STDC_WANT_IEC_60559_BFP_EXT__ # define __STDC_WANT_IEC_60559_BFP_EXT__ 1 #endif /* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ #ifndef __STDC_WANT_IEC_60559_DFP_EXT__ # define __STDC_WANT_IEC_60559_DFP_EXT__ 1 #endif /* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ # define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 #endif /* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ # define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 #endif /* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ #ifndef __STDC_WANT_LIB_EXT2__ # define __STDC_WANT_LIB_EXT2__ 1 #endif /* Enable extensions specified by ISO/IEC 24747:2009. */ #ifndef __STDC_WANT_MATH_SPEC_FUNCS__ # define __STDC_WANT_MATH_SPEC_FUNCS__ 1 #endif /* Enable extensions on HP NonStop. */ #ifndef _TANDEM_SOURCE # define _TANDEM_SOURCE 1 #endif /* Enable X/Open extensions. Define to 500 only if necessary to make mbstate_t available. */ #ifndef _XOPEN_SOURCE /* # undef _XOPEN_SOURCE */ #endif /* Version number of package */ -#define VERSION "5.4.3" +#define VERSION "5.4.4" /* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel). */ #if defined(__FreeBSD__) #include #if defined(__NO_STRICT_ALIGNMENT) #define TUKLIB_FAST_UNALIGNED_ACCESS 1 #endif #include #if _BYTE_ORDER == _BIG_ENDIAN # define WORDS_BIGENDIAN 1 #endif #endif /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ /* Define for Solaris 2.5.1 so the uint32_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ /* #undef _UINT32_T */ /* Define for Solaris 2.5.1 so the uint64_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ /* #undef _UINT64_T */ /* Define for Solaris 2.5.1 so the uint8_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ /* #undef _UINT8_T */ /* Define to rpl_ if the getopt replacement functions and variables should be used. */ /* #undef __GETOPT_PREFIX */ /* Define to the type of a signed integer type of width exactly 32 bits if such a type exists and the standard includes do not define it. */ /* #undef int32_t */ /* Define to the type of a signed integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ /* #undef int64_t */ /* Define to the type of an unsigned integer type of width exactly 16 bits if such a type exists and the standard includes do not define it. */ /* #undef uint16_t */ /* Define to the type of an unsigned integer type of width exactly 32 bits if such a type exists and the standard includes do not define it. */ /* #undef uint32_t */ /* Define to the type of an unsigned integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ /* #undef uint64_t */ /* Define to the type of an unsigned integer type of width exactly 8 bits if such a type exists and the standard includes do not define it. */ /* #undef uint8_t */ /* Define to the type of an unsigned integer type wide enough to hold a pointer, if such a type exists, and if the system does not define it. */ /* #undef uintptr_t */