diff --git a/contrib/xz/AUTHORS b/contrib/xz/AUTHORS index bda87975a51d..0186555975f4 100644 --- a/contrib/xz/AUTHORS +++ b/contrib/xz/AUTHORS @@ -1,27 +1,27 @@ Authors of XZ Utils =================== XZ Utils is developed and maintained by Lasse Collin - . + and Jia Tan . Major parts of liblzma are based on code written by Igor Pavlov, specifically the LZMA SDK . Without this code, XZ Utils wouldn't exist. The SHA-256 implementation in liblzma is based on the code found from 7-Zip , which has a modified version of the SHA-256 code found from Crypto++ . The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. Some scripts have been adapted from gzip. The original versions were written by Jean-loup Gailly, Charles Levert, and Paul Eggert. Andrew Dudman helped adapting the scripts and their man pages for XZ Utils. The GNU Autotools-based build system contains files from many authors, which I'm not trying to list here. Several people have contributed fixes or reported bugs. Most of them are mentioned in the file THANKS. diff --git a/contrib/xz/ChangeLog b/contrib/xz/ChangeLog index 37c0d95c4238..de9c32fb2187 100644 --- a/contrib/xz/ChangeLog +++ b/contrib/xz/ChangeLog @@ -1,6947 +1,9067 @@ +commit 5476089d9c42b9b04e92b80e1800b384a98265cb +Author: Lasse Collin +Date: 2022-11-13 19:58:47 +0200 + + Bump version and soname for 5.2.8. + + src/liblzma/Makefile.am | 2 +- + src/liblzma/api/lzma/version.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit f9994f395d04d37b75e879f90397258b86b466cc +Author: Lasse Collin +Date: 2022-11-13 19:57:26 +0200 + + Add NEWS for 5.2.8. + + NEWS | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 79 insertions(+) + +commit cdf14b28993ef2f3168821372467e3aa7cf1e973 +Author: Lasse Collin +Date: 2022-11-11 17:16:03 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 454f567e58bbadea5cff4f31ef450c1305e9853d +Author: Lasse Collin +Date: 2022-11-11 17:15:25 +0200 + + liblzma: Fix building with Intel ICC (the classic compiler). + + It claims __GNUC__ >= 10 but doesn't support __symver__ attribute. + + Thanks to Stephen Sachs. + + src/liblzma/common/common.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 2f01169f5a81e21e7a7e5f799c32472c6277b1d5 +Author: Lasse Collin +Date: 2022-11-11 14:35:58 +0200 + + liblzma: Fix incorrect #ifdef for x86 SSE2 support. + + __SSE2__ is the correct macro for SSE2 support with GCC, Clang, + and ICC. __SSE2_MATH__ means doing floating point math with SSE2 + instead of 387. Often the latter macro is defined if the first + one is but it was still a bug. + + src/liblzma/common/memcmplen.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +commit fc1358679ed3994f67e5e8c869c281be82370a32 +Author: Lasse Collin +Date: 2022-11-11 12:23:58 +0200 + + Scripts: Ignore warnings from xz. + + In practice this means making the scripts work when + the input files have an unsupported check type which + isn't a problem in practice unless support for + some check types has been disabled at build time. + + src/scripts/xzdiff.in | 5 +++-- + src/scripts/xzgrep.in | 2 +- + src/scripts/xzless.in | 4 ++-- + src/scripts/xzmore.in | 4 ++-- + 4 files changed, 8 insertions(+), 7 deletions(-) + +commit a08be1c420501b1c75770a8f6103e3959486c895 +Author: Lasse Collin +Date: 2022-11-09 14:10:52 +0200 + + xz: Add comments about stdin and src_st.st_size. + + "xz -v < regular_file > out.xz" doesn't display the percentage + and estimated remaining time because it doesn't even try to + check the input file size when input is read from stdin. + This could be improved but for now there's just a comment + to remind about it. + + src/xz/coder.c | 9 +++++++++ + src/xz/file_io.c | 4 ++++ + 2 files changed, 13 insertions(+) + +commit 3ee411cd1cd636bdced6ecede4651394bb4f9cb1 +Author: Lasse Collin +Date: 2022-11-09 12:48:22 +0200 + + xz: Fix displaying of file sizes in progress indicator in passthru mode. + + It worked for one input file since the counters are zero when + xz starts but they weren't reset when starting a new file in + passthru mode. For example, if files A, B, and C are one byte each, + then "xz -dcvf A B C" would show file sizes as 1, 2, and 3 bytes + instead of 1, 1, and 1 byte. + + src/xz/coder.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +commit aa7fa9d960df4bb21f7727bbf7ce0503f3d211b4 +Author: Lasse Collin +Date: 2022-11-09 11:27:20 +0200 + + xz: Add a comment why --to-stdout is not in --help. + + It is on the man page still. + + src/xz/message.c | 3 +++ + 1 file changed, 3 insertions(+) + +commit ff49ff84a4ba3fab25ffb6d65af321dee36c1273 +Author: Lasse Collin +Date: 2022-11-08 22:26:54 +0200 + + Docs: Update faq.txt a little. + + doc/faq.txt | 66 ++++++++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 43 insertions(+), 23 deletions(-) + +commit 3489565b75b45ee514cf0a6785bb721c7dc624f3 +Author: Lasse Collin +Date: 2022-11-08 14:13:03 +0200 + + liblzma: Update API docs about decoder flags. + + src/liblzma/api/lzma/container.h | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +commit e4937710805c4b660fc629c15d9a6164b2d97822 +Author: Lasse Collin +Date: 2022-11-08 14:00:58 +0200 + + liblzma: Fix a comment in auto_decoder.c. + + src/liblzma/common/auto_decoder.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit d4674dfbb7d1df1feb841f5dbce6ae1f0b026879 +Author: Jia Tan +Date: 2022-11-07 16:24:14 +0200 + + xz: Avoid a compiler warning in progress_speed() in message.c. + + This should be smaller too since it avoids the string constants. + + src/xz/message.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +commit 4ed56d32a91ff8ff21c71db4082a54a72ae08b3d +Author: Lasse Collin +Date: 2022-10-31 16:26:05 +0200 + + Build: Clarify comment in configure.ac about SSE2. + + configure.ac | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +commit f930638797f1fec05b272367c052d4fc9de35896 +Author: Lasse Collin +Date: 2022-10-31 16:16:37 +0200 + + Build: Remove obsolete commented-out lines from configure.ac. + + configure.ac | 4 ---- + 1 file changed, 4 deletions(-) + +commit 6930f14733757592a4f8d92513f642a5dcc9ec9c +Author: Lasse Collin +Date: 2022-10-31 13:31:58 +0200 + + Windows: Fix mythread_once() macro with Vista threads. + + Don't call InitOnceComplete() if initialization was already done. + + So far mythread_once() has been needed only when building + with --enable-small. windows/build.bash does this together + with --disable-threads so the Vista-specific mythread_once() + is never needed by those builds. VS project files or + CMake-builds don't support HAVE_SMALL builds at all. + + src/common/mythread.h | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +commit 1c8cbb5be3a97bf22e933777369c4cc28ef9ed5f +Author: Lasse Collin +Date: 2022-10-31 11:49:47 +0200 + + CMake: Sync tuklib_cpucores.cmake with tuklib_cpucores.m4. + + This was forgotten from commit 2611c4d90535652d3eb7ef4a026a6691276fab43. + + cmake/tuklib_cpucores.cmake | 5 +++++ + 1 file changed, 5 insertions(+) + +commit fa9efb729b1827b25ec71aefcf32f3aceeaf9597 +Author: Lasse Collin +Date: 2022-10-25 23:45:03 +0300 + + Build: Use AC_CONFIG_HEADERS instead of the ancient AC_CONFIG_HEADER. + + We require Autoconf >= 2.69 and that has AC_CONFIG_HEADERS. + + There is a warning about AC_PROG_CC_C99 being obsolete but + it cannot be removed because it is needed with Autoconf 2.69. + + configure.ac | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit b10ba4bf39edd9c9fef42cade3e64fc3b9e3fd19 +Author: Lasse Collin +Date: 2022-10-25 23:31:44 +0300 + + Build: Update m4/ax_pthread.m4 from Autoconf Archive. + + m4/ax_pthread.m4 | 29 ++++++++++++++++++++++------- + 1 file changed, 22 insertions(+), 7 deletions(-) + +commit 01744b280c6e35d331b9dc4cc64716b82bc8b3db +Author: Lasse Collin +Date: 2022-10-25 23:09:11 +0300 + + xz: Fix --single-stream with an empty .xz Stream. + + Example: + + $ xz -dc --single-stream good-0-empty.xz + xz: good-0-empty.xz: Internal error (bug) + + The code, that is tries to catch some input file issues early, + didn't anticipate LZMA_STREAM_END which is possible in that + code only when --single-stream is used. + + src/xz/coder.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +commit a3e4606134058241bf1c396243bebcec42c2a780 +Author: Lasse Collin +Date: 2022-10-25 19:07:17 +0300 + + xz: Fix decompressor behavior if input uses an unsupported check type. + + Now files with unsupported check will make xz display + a warning, set the exit status to 2 (unless --no-warn is used), + and then decompress the file normally. This is how it was + supposed to work since the beginning but this was broken by + the commit 231c3c7098f1099a56abb8afece76fc9b8699f05, that is, + a little before 5.0.0 was released. The buggy behavior displayed + a message, set exit status 1 (error), and xz didn't attempt to + to decompress the file. + + This doesn't matter today except for special builds that disable + CRC64 or SHA-256 at build time (but such builds should be used + in special situations only). The bug matters if new check type + is added in the future and an old xz version is used to decompress + such a file; however, it's likely that such files would use a new + filter too and an old xz wouldn't be able to decompress the file + anyway. + + The first hunk in the commit is the actual fix. The second hunk + is a cleanup since LZMA_TELL_ANY_CHECK isn't used in xz. + + There is a test file for unsupported check type but it wasn't + used by test_files.sh, perhaps due to different behavior between + xz and the simpler xzdec. + + src/xz/coder.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +commit 0b5e8c7e073bce07c59112af1e8a7495e3576e6c +Author: Lasse Collin +Date: 2022-10-25 18:36:19 +0300 + + xz: Clarify the man page: input file isn't removed if an error occurs. + + src/xz/xz.1 | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +commit 23b7416d5ba0e3579265467bd546865d2ea4cef0 +Author: Lasse Collin +Date: 2022-10-25 18:23:54 +0300 + + xz: If input file cannot be removed, treat it as a warning, not error. + + Treating it as a warning (message + exit status 2) matches gzip + and it seems more logical as at that point the output file has + already been successfully closed. When it's a warning it is + possible to suppress it with --no-warn. + + src/xz/file_io.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit 5daa40454b63c4eb04ab5caeb339eddc95c94bb1 +Author: Lasse Collin +Date: 2022-10-20 20:22:50 +0300 + + tuklib_cpucores: Use HW_NCPUONLINE on OpenBSD. + + On OpenBSD the number of cores online is often less + than what HW_NCPU would return because OpenBSD disables + simultaneous multi-threading (SMT) by default. + + Thanks to Christian Weisgerber. + + m4/tuklib_cpucores.m4 | 5 +++++ + src/common/tuklib_cpucores.c | 9 +++++++++ + 2 files changed, 14 insertions(+) + +commit 0af861050f375678eafc4e1136ca589ae47d31c0 +Author: Lasse Collin +Date: 2022-11-11 13:25:02 +0200 + + NEWS: Omit the extra copy of 5.2.5 NEWS. + + It was a copy-paste error. + + NEWS | 105 ------------------------------------------------------------------- + 1 file changed, 105 deletions(-) + +commit f0c6a66701afed12c5926c0499b150ecd5b0a63c +Author: Lasse Collin +Date: 2022-11-10 12:34:43 +0200 + + Translations: Rename poa4/fr_FR.po to po4a/fr.po. + + That's how it is preferred at the Translation Project. + On my system /usr/share/man/fr_FR doesn't contain any + other man pages than XZ Utils while /usr/share/man/fr + has quite a few, so this will fix that too. + + Thanks to Benno Schulenberg from the Translation Project. + + po4a/{fr_FR.po => fr.po} | 0 + po4a/po4a.conf | 2 +- + 2 files changed, 1 insertion(+), 1 deletion(-) + +commit 6bf8b1f870870a1a0c3f555c34decc9ffb9f8d7a +Author: Lasse Collin +Date: 2022-11-08 16:57:17 +0200 + + Translations: Update Turkish translation. + + po/tr.po | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 9f8e9d3c8189d8c6f587c99b0da1cf83f72330c2 +Author: Lasse Collin +Date: 2022-11-08 14:55:32 +0200 + + Translations: Update Croatian translation. + + po/hr.po | 190 ++++++++++++++++++++++++++++++++------------------------------- + 1 file changed, 96 insertions(+), 94 deletions(-) + +commit d24a57b7fc7e5e9267b84367cb0788d3acf7f569 +Author: Lasse Collin +Date: 2022-09-30 16:41:03 +0300 + + Bump version and soname for 5.2.7. + + src/liblzma/Makefile.am | 2 +- + src/liblzma/api/lzma/version.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit d2003362dd42004355a5550ee7a60ace3b3ea2d8 +Author: Lasse Collin +Date: 2022-09-30 16:40:39 +0300 + + Add NEWS for 5.2.7. + + NEWS | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 74 insertions(+) + +commit 369afb51991e91a5c3a23c4ae3872329a403eb31 +Author: Lasse Collin +Date: 2022-09-30 12:06:13 +0300 + + liblzma: Add API doc note about the .xz decoder LZMA_MEMLIMIT_ERROR bug. + + The bug was fixed in 660739f99ab211edec4071de98889fb32ed04e98. + + src/liblzma/api/lzma/base.h | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +commit 166431e9951613481b8136f52b570d7bc30b5a03 +Author: Jia Tan +Date: 2022-09-21 16:15:50 +0800 + + liblzma: Add dest and src NULL checks to lzma_index_cat. + + The documentation states LZMA_PROG_ERROR can be returned from + lzma_index_cat. Previously, lzma_index_cat could not return + LZMA_PROG_ERROR. Now, the validation is similar to + lzma_index_append, which does a NULL check on the index + parameter. + + src/liblzma/common/index.c | 3 +++ + 1 file changed, 3 insertions(+) + +commit 5e53a6c28b87a0bb9b0cfeaf708cc99ef8e9bc66 +Author: Jia Tan +Date: 2022-09-21 20:29:28 +0800 + + Tests: Create a test for the lzma_index_cat bug. + + tests/test_index.c | 43 ++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 42 insertions(+), 1 deletion(-) + +commit 4ed5fd54c676b735e3f58158f214af68755d59b5 +Author: Jia Tan +Date: 2022-09-21 19:28:53 +0800 + + liblzma: Fix copying of check type statistics in lzma_index_cat(). + + The check type of the last Stream in dest was never copied to + dest->checks (the code tried to copy it but it was done too late). + This meant that the value returned by lzma_index_checks() would + only include the check type of the last Stream when multiple + lzma_indexes had been concatenated. + + In xz --list this meant that the summary would only list the + check type of the last Stream, so in this sense this was only + a visual bug. However, it's possible that some applications + use this information for purposes other than merely showing + it to the users in an informational message. I'm not aware of + such applications though and it's quite possible that such + applications don't exist. + + Regular streamed decompression in xz or any other application + doesn't use lzma_index_cat() and so this bug cannot affect them. + + src/liblzma/common/index.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +commit c4476f695291a2d7376d5fa406d32e354f858858 +Author: Lasse Collin +Date: 2022-09-28 12:20:41 +0300 + + tuklib_physmem: Fix Unicode builds on Windows. + + Thanks to ArSaCiA Game. + + src/common/tuklib_physmem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 976f897bbba9890a99f01a8d59c6d59c230142e6 +Author: Lasse Collin +Date: 2022-09-28 11:05:15 +0300 + + liblzma: Stream decoder: Fix restarting after LZMA_MEMLIMIT_ERROR. + + If lzma_code() returns LZMA_MEMLIMIT_ERROR it is now possible + to use lzma_memlimit_set() to increase the limit and continue + decoding. This was supposed to work from the beginning but + there was a bug. With other decoders (.lzma or threaded .xz) + this already worked correctly. + + src/liblzma/common/stream_decoder.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +commit 2caa9580e5f7c5cc6668e9427a2c21f3f63bb684 +Author: Lasse Collin +Date: 2022-09-28 11:00:23 +0300 + + liblzma: Stream decoder: Fix comments. + + src/liblzma/common/stream_decoder.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +commit 51882fec5b96cf7760389b670d86c1e7a5a0ced2 +Author: Lasse Collin +Date: 2022-09-16 14:09:07 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 974186f7cd839f1dbf937b98e112dff670d2bd5c +Author: Lasse Collin +Date: 2022-09-16 14:07:03 +0300 + + xzgrep: Fix compatibility with old shells. + + Running the current xzgrep on Slackware 10.1 with GNU bash 3.00.15: + + xzgrep: line 231: syntax error near unexpected token `;;' + + On SCO OpenServer 5.0.7 with Korn Shell 93r: + + syntax error at line 231 : `;;' unexpected + + Turns out that some old shells don't like apostrophes (') inside + command substitutions. For example, the following fails: + + x=$(echo foo + # asdf'zxcv + echo bar) + printf '%s\n' "$x" + + The problem was introduced by commits + 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 (2022-03-29), + bd7b290f3fe4faeceb7d3497ed9bf2e6ed5e7dc5 (2022-07-18), and + a648978b20495b7aa4a8b029c5a810b5ad9d08ff (2022-07-19). + 5.2.6 is the only stable release that included + this problem. + + Thanks to Kevin R. Bulgrien for reporting the problem + on SCO OpenServer 5.0.7 and for providing the fix. + + src/scripts/xzgrep.in | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +commit f94da15120c3d3c363ca12c2262ac6cb9f321f4f +Author: Lasse Collin +Date: 2022-09-09 13:51:57 +0300 + + liblzma: lzma_filters_copy: Keep dest[] unmodified if an error occurs. + + lzma_stream_encoder() and lzma_stream_encoder_mt() always assumed + this. Before this patch, failing lzma_filters_copy() could result + in free(invalid_pointer) or invalid memory reads in stream_encoder.c + or stream_encoder_mt.c. + + To trigger this, allocating memory for a filter options structure + has to fail. These are tiny allocations so in practice they very + rarely fail. + + Certain badness in the filter chain array could also make + lzma_filters_copy() fail but both stream_encoder.c and + stream_encoder_mt.c validate the filter chain before + trying to copy it, so the crash cannot occur this way. + + src/liblzma/api/lzma/filter.h | 4 +++- + src/liblzma/common/filter_common.c | 18 ++++++++++++------ + 2 files changed, 15 insertions(+), 7 deletions(-) + +commit ea57b9aa2c3e1cdb667f8dd698314b1c36047018 +Author: Lasse Collin +Date: 2022-09-16 17:08:53 +0300 + + Tests: Add a test file for lzma_index_append() integer overflow bug. + + This test fails before commit 18d7facd3802b55c287581405c4d49c98708c136. + + test_files.sh now runs xz -l for bad-3-index-uncomp-overflow.xz + because only then the previously-buggy code path gets tested. + Normal decompression doesn't use lzma_index_append() at all. + Instead, lzma_index_hash functions are used and those already + did the overflow check. + + tests/files/README | 10 ++++++++++ + tests/files/bad-3-index-uncomp-overflow.xz | Bin 0 -> 132 bytes + tests/test_files.sh | 8 ++++++++ + 3 files changed, 18 insertions(+) + +commit 72e1645a439a999638a63ec9abb7210671ea5415 +Author: Jia Tan +Date: 2022-09-02 20:18:55 +0800 + + liblzma: lzma_index_append: Add missing integer overflow check. + + The documentation in src/liblzma/api/lzma/index.h suggests that + both the unpadded (compressed) size and the uncompressed size + are checked for overflow, but only the unpadded size was checked. + The uncompressed check is done first since that is more likely to + occur than the unpadded or index field size overflows. + + src/liblzma/common/index.c | 4 ++++ + 1 file changed, 4 insertions(+) + +commit 20d82bc90781aa9d2373efe872d90ba64dc7dc62 +Author: Lasse Collin +Date: 2022-09-08 15:11:08 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 31d80c6b261b24220776dfaeb8a04f80f80e0a24 +Author: Lasse Collin +Date: 2022-09-04 23:23:00 +0300 + + liblzma: Vaccinate against an ill patch from RHEL/CentOS 7. + + RHEL/CentOS 7 shipped with 5.1.2alpha, including the threaded + encoder that is behind #ifdef LZMA_UNSTABLE in the API headers. + In 5.1.2alpha these symbols are under XZ_5.1.2alpha in liblzma.map. + API/ABI compatibility tracking isn't done between development + releases so newer releases didn't have XZ_5.1.2alpha anymore. + + Later RHEL/CentOS 7 updated xz to 5.2.2 but they wanted to keep + the exported symbols compatible with 5.1.2alpha. After checking + the ABI changes it turned out that >= 5.2.0 ABI is backward + compatible with the threaded encoder functions from 5.1.2alpha + (but not vice versa as fixes and extensions to these functions + were made between 5.1.2alpha and 5.2.0). + + In RHEL/CentOS 7, XZ Utils 5.2.2 was patched with + xz-5.2.2-compat-libs.patch to modify liblzma.map: + + - XZ_5.1.2alpha was added with lzma_stream_encoder_mt and + lzma_stream_encoder_mt_memusage. This matched XZ Utils 5.1.2alpha. + + - XZ_5.2 was replaced with XZ_5.2.2. It is clear that this was + an error; the intention was to keep using XZ_5.2 (XZ_5.2.2 + has never been used in XZ Utils). So XZ_5.2.2 lists all + symbols that were listed under XZ_5.2 before the patch. + lzma_stream_encoder_mt and _mt_memusage are included too so + they are listed both here and under XZ_5.1.2alpha. + + The patch didn't add any __asm__(".symver ...") lines to the .c + files. Thus the resulting liblzma.so exports the threaded encoder + functions under XZ_5.1.2alpha only. Listing the two functions + also under XZ_5.2.2 in liblzma.map has no effect without + matching .symver lines. + + The lack of XZ_5.2 in RHEL/CentOS 7 means that binaries linked + against unpatched XZ Utils 5.2.x won't run on RHEL/CentOS 7. + This is unfortunate but this alone isn't too bad as the problem + is contained within RHEL/CentOS 7 and doesn't affect users + of other distributions. It could also be fixed internally in + RHEL/CentOS 7. + + The second problem is more serious: In XZ Utils 5.2.2 the API + headers don't have #ifdef LZMA_UNSTABLE for obvious reasons. + This is true in RHEL/CentOS 7 version too. Thus now programs + using new APIs can be compiled without an extra #define. However, + the programs end up depending on symbol version XZ_5.1.2alpha + (and possibly also XZ_5.2.2) instead of XZ_5.2 as they would + with an unpatched XZ Utils 5.2.2. This means that such binaries + won't run on other distributions shipping XZ Utils >= 5.2.0 as + they don't provide XZ_5.1.2alpha or XZ_5.2.2; they only provide + XZ_5.2 (and XZ_5.0). (This includes RHEL/CentOS 8 as the patch + luckily isn't included there anymore with XZ Utils 5.2.4.) + + Binaries built by RHEL/CentOS 7 users get distributed and then + people wonder why they don't run on some other distribution. + Seems that people have found out about the patch and been copying + it to some build scripts, seemingly curing the symptoms but + actually spreading the illness further and outside RHEL/CentOS 7. + + The ill patch seems to be from late 2016 (RHEL 7.3) and in 2017 it + had spread at least to EasyBuild. I heard about the events only + recently. :-( + + This commit splits liblzma.map into two versions: one for + GNU/Linux and another for other OSes that can use symbol versioning + (FreeBSD, Solaris, maybe others). The Linux-specific file and the + matching additions to .c files add full compatibility with binaries + that have been built against a RHEL/CentOS-patched liblzma. Builds + for OSes other than GNU/Linux won't get the vaccine as they should + be immune to the problem (I really hope that no build script uses + the RHEL/CentOS 7 patch outside GNU/Linux). + + The RHEL/CentOS compatibility symbols XZ_5.1.2alpha and XZ_5.2.2 + are intentionally put *after* XZ_5.2 in liblzma_linux.map. This way + if one forgets to #define HAVE_SYMBOL_VERSIONS_LINUX when building, + the resulting liblzma.so.5 will have lzma_stream_encoder_mt@@XZ_5.2 + since XZ_5.2 {...} is the first one that lists that function. + Without HAVE_SYMBOL_VERSIONS_LINUX @XZ_5.1.2alpha and @XZ_5.2.2 + will be missing but that's still a minor problem compared to + only having lzma_stream_encoder_mt@@XZ_5.1.2alpha! + + The "local: *;" line was moved to XZ_5.0 so that it doesn't need + to be moved around. It doesn't matter where it is put. + + Having two similar liblzma_*.map files is a bit silly as it is, + at least for now, easily possible to generate the generic one + from the Linux-specific file. But that adds extra steps and + increases the risk of mistakes when supporting more than one + build system. So I rather maintain two files in parallel and let + validate_map.sh check that they are in sync when "make mydist" + is run. + + This adds .symver lines for lzma_stream_encoder_mt@XZ_5.2.2 and + lzma_stream_encoder_mt_memusage@XZ_5.2.2 even though these + weren't exported by RHEL/CentOS 7 (only @@XZ_5.1.2alpha was + for these two). I added these anyway because someone might + misunderstand the RHEL/CentOS 7 patch and think that @XZ_5.2.2 + (@@XZ_5.2.2) versions were exported too. + + At glance one could suggest using __typeof__ to copy the function + prototypes when making aliases. However, this doesn't work trivially + because __typeof__ won't copy attributes (lzma_nothrow, lzma_pure) + and it won't change symbol visibility from hidden to default (done + by LZMA_API()). Attributes could be copied with __copy__ attribute + but that needs GCC 9 and a fallback method would be needed anyway. + + This uses __symver__ attribute with GCC >= 10 and + __asm__(".symver ...") with everything else. The attribute method + is required for LTO (-flto) support with GCC. Using -flto with + GCC older than 10 is now broken on GNU/Linux and will not be fixed + (can silently result in a broken liblzma build that has dangerously + incorrect symbol versions). LTO builds with Clang seem to work + with the traditional __asm__(".symver ...") method. + + Thanks to Boud Roukema for reporting the problem and discussing + the details and testing the fix. + + configure.ac | 23 ++++- + src/liblzma/Makefile.am | 10 +- + src/liblzma/common/block_buffer_encoder.c | 18 ++++ + src/liblzma/common/common.c | 14 +++ + src/liblzma/common/common.h | 28 ++++++ + src/liblzma/common/hardware_cputhreads.c | 12 +++ + src/liblzma/common/stream_encoder_mt.c | 42 ++++++++ + src/liblzma/{liblzma.map => liblzma_generic.map} | 6 +- + src/liblzma/liblzma_linux.map | 123 +++++++++++++++++++++++ + src/liblzma/validate_map.sh | 113 +++++++++++++++++++-- + 10 files changed, 374 insertions(+), 15 deletions(-) + +commit e7a7ac744eb0f890ef52388de838596ef566c73f +Author: Jia Tan +Date: 2022-09-08 15:07:00 +0300 + + CMake: Clarify a comment about Windows symlinks without file extension. + + CMakeLists.txt | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +commit a273a0cb77c6000e4da627b07f28c5af41f0908c +Author: Lasse Collin +Date: 2022-09-08 15:02:41 +0300 + + CMake: Update for liblzma_*.map files and fix wrong common_w32res.rc dep. + + The previous commit split liblzma.map into liblzma_linux.map and + liblzma_generic.map. This commit updates the CMake build for those. + + common_w32res.rc dependency was listed under Linux/FreeBSD while + obviously it belongs to Windows when building a DLL. + + CMakeLists.txt | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +commit 5875a45be0ab399e37acc31165725a9b5291f67e +Author: Lasse Collin +Date: 2022-08-31 16:42:04 +0300 + + CMake: Add xz symlinks. + + These are a minor thing especially since the xz build has + some real problems still like lack of large file support + on 32-bit systems but I'll commit this since the code exists. + + Thanks to Jia Tan. + + CMakeLists.txt | 38 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 37 insertions(+), 1 deletion(-) + +commit 3523b6ebb56c447f21736f01bfe91602741aa3e7 +Author: Lasse Collin +Date: 2022-08-31 16:29:38 +0300 + + CMake: Put xz man page install under if(UNIX) like is for xzdec. + + Thanks to Jia Tan. + + CMakeLists.txt | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +commit 5af9e8759f748a11a408bfc5aef829df3811fc23 +Author: Lasse Collin +Date: 2022-09-16 15:10:07 +0300 + + Translations: Add Turkish translation. + + po/LINGUAS | 1 + + po/tr.po | 977 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 978 insertions(+) + +commit f05a69685e9c7f7157d751abfbd2d8ea6e3bacc7 +Author: Lasse Collin +Date: 2022-08-18 17:49:16 +0300 + + Build: Include the CMake files in the distribution. + + This was supposed to be done in 2020 with 5.2.5 release + already but it was noticed only today. 5.2.5 and 5.2.6 + even mention experiemental CMake support in the NEWS entries. + + Thanks to Olivier B. for reporting the problem. + + Makefile.am | 2 ++ + 1 file changed, 2 insertions(+) + +commit ad5ef6d3c30bb749113c51aa0bbe8bcb5e193fbe +Author: Lasse Collin +Date: 2022-08-18 17:38:05 +0300 + + Windows: Fix broken liblzma.dll build with Visual Studio project files. + + The bug was introduced in 352ba2d69af2136bc814aa1df1a132559d445616 + "Windows: Fix building of resource files when config.h isn't used." + + That commit fixed liblzma.dll build with CMake while keeping it + working with Autotools on Windows but the VS project files were + forgotten. + + I haven't tested these changes. + + Thanks to Olivier B. for reporting the bug and for the initial patch. + + windows/vs2013/liblzma_dll.vcxproj | 6 ++++++ + windows/vs2017/liblzma_dll.vcxproj | 6 ++++++ + windows/vs2019/liblzma_dll.vcxproj | 6 ++++++ + 3 files changed, 18 insertions(+) + +commit 8dfed05bdaa4873833ba24279f02ad2db25effea +Author: Lasse Collin +Date: 2022-08-12 14:30:13 +0300 + + Bump version and soname for 5.2.6. + + src/liblzma/Makefile.am | 2 +- + src/liblzma/api/lzma/version.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit 09b4af4e04def5777b06124200b64b0b941eea7f +Author: Lasse Collin +Date: 2022-08-12 14:29:28 +0300 + + Add NEWS for 5.2.6. + + NEWS | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 226 insertions(+) + +commit 692de534fae6c38f92416793addcf5f60ffe2bf6 +Author: Lasse Collin +Date: 2022-08-12 14:28:41 +0300 + + Add Jia Tan to AUTHORS. + + AUTHORS | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 275de376a645f406be7c453bb68427aa9466cf83 +Author: Lasse Collin +Date: 2022-07-25 19:11:05 +0300 + + Translations: Change the copyright comment string to use with po4a. + + This affects the second line in po4a/xz-man.pot. The man pages of + xzdiff, xzgrep, and xzmore are from GNU gzip and under GNU GPLv2+ + while the rest of the man pages are in the public domain. + + po4a/update-po | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 76a5a752b8467ff591dd028deb61e9bf2c274c7e +Author: Jia Tan +Date: 2022-07-25 18:30:05 +0300 + + liblzma: Refactor lzma_mf_is_supported() to use a switch-statement. + + src/liblzma/lz/lz_encoder.c | 32 ++++++++++++++------------------ + 1 file changed, 14 insertions(+), 18 deletions(-) + +commit 749b86c2c18ab61a07f19ec8fefc67325da97397 +Author: Jia Tan +Date: 2022-07-25 18:20:01 +0300 + + Build: Don't allow empty LIST in --enable-match-finders=LIST. + + It's enforced only when a match finder is needed, that is, + when LZMA1 or LZMA2 encoder is enabled. + + configure.ac | 4 ++++ + 1 file changed, 4 insertions(+) + +commit 63e3cdef805a4d9ea94c76c36cbabe4dc76abe36 +Author: Lasse Collin +Date: 2021-01-11 23:41:16 +0200 + + xz: Make --keep accept symlinks, hardlinks, and setuid/setgid/sticky. + + Previously this required using --force but that has other + effects too which might be undesirable. Changing the behavior + of --keep has a small risk of breaking existing scripts but + since this is a fairly special corner case I expect the + likehood of breakage to be low enough. + + I think the new behavior is more logical. The only reason for + the old behavior was to be consistent with gzip and bzip2. + + Thanks to Vincent Lefevre and Sebastian Andrzej Siewior. + + src/xz/file_io.c | 9 +++++---- + src/xz/xz.1 | 16 +++++++++++++++- + 2 files changed, 20 insertions(+), 5 deletions(-) + +commit 9055584be0ae47032b7fb3ba8531aa56b9ed572a +Author: Lasse Collin +Date: 2022-07-19 23:19:49 +0300 + + xzgrep man page: Document exit statuses. + + src/scripts/xzgrep.1 | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +commit 57e1ccbb7ce30e5298ea7ec3934e462fa8d32237 +Author: Lasse Collin +Date: 2022-07-19 23:13:24 +0300 + + xzgrep: Improve error handling, especially signals. + + xzgrep wouldn't exit on SIGPIPE or SIGQUIT when it clearly + should have. It's quite possible that it's not perfect still + but at least it's much better. + + If multiple exit statuses compete, now it tries to pick + the largest of value. + + Some comments were added. + + The exit status handling of signals is still broken if the shell + uses values larger than 255 in $? to indicate that a process + died due to a signal ***and*** their "exit" command doesn't take + this into account. This seems to work well with the ksh and yash + versions I tried. However, there is a report in gzip/zgrep that + OpenSolaris 5.11 (not 5.10) has a problem with "exit" truncating + the argument to 8 bits: + + https://debbugs.gnu.org/cgi/bugreport.cgi?bug=22900#25 + + Such a bug would break xzgrep but I didn't add a workaround + at least for now. 5.11 is old and I don't know if the problem + exists in modern descendants, or if the problem exists in other + ksh implementations in use. + + src/scripts/xzgrep.in | 72 +++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 53 insertions(+), 19 deletions(-) + +commit 6351ea1afb261ec9be95f808bcbe724e0da4ae09 +Author: Lasse Collin +Date: 2022-07-19 00:10:55 +0300 + + xzgrep: Make the fix for ZDI-CAN-16587 more robust. + + I don't know if this can make a difference in the real world + but it looked kind of suspicious (what happens with sed + implementations that cannot process very long lines?). + At least this commit shouldn't make it worse. + + src/scripts/xzgrep.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +commit 2c1ff2ed6b9f17b3913866a15d776ebbbeb824d2 +Author: Lasse Collin +Date: 2022-07-18 21:52:31 +0300 + + xzgrep: Use grep -H --label when available (GNU, *BSDs). + + It avoids the use of sed for prefixing filenames to output lines. + Using sed for that is slower and prone to security bugs so now + the sed method is only used as a fallback. + + This also fixes an actual bug: When grepping a binary file, + GNU grep nowadays prints its diagnostics to stderr instead of + stdout and thus the sed-method for prefixing the filename doesn't + work. So with this commit grepping binary files gives reasonable + output with GNU grep now. + + This was inspired by zgrep but the implementation is different. + + src/scripts/xzgrep.in | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +commit 8b0be38a795fa1cdffdada64341a34209e630060 +Author: Lasse Collin +Date: 2022-07-18 21:10:25 +0300 + + xzgrep: Use -e to specify the pattern to grep. + + Now we don't need the separate test for adding the -q option + as it can be added directly in the two places where it's needed. + + src/scripts/xzgrep.in | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +commit 4a61867a874c60b31448d2d4f3c276efba85a243 +Author: Lasse Collin +Date: 2022-07-18 19:18:48 +0300 + + Scripts: Use printf instead of echo in a few places. + + It's a good habbit as echo has some portability corner cases + when the string contents can be anything. + + src/scripts/xzdiff.in | 6 +++--- + src/scripts/xzgrep.in | 4 ++-- + src/scripts/xzless.in | 4 ++-- + src/scripts/xzmore.in | 8 ++++---- + 4 files changed, 11 insertions(+), 11 deletions(-) + +commit 0e222bf7d79a9792025c5ffeaa66c8e14eb47615 +Author: Lasse Collin +Date: 2022-07-17 21:36:25 +0300 + + xzgrep: Add more LC_ALL=C to avoid bugs with multibyte characters. + + Also replace one use of expr with printf. + + The rationale for LC_ALL=C was already mentioned in + 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 that fixed a security + issue. However, unrelated uses weren't changed in that commit yet. + + POSIX says that with sed and such tools one should use LC_ALL=C + to ensure predictable behavior when strings contain byte sequences + that aren't valid multibyte characters in the current locale. See + under "Application usage" in here: + + https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html + + With GNU sed invalid multibyte strings would work without this; + it's documented in its Texinfo manual. Some other implementations + aren't so forgiving. + + src/scripts/xzgrep.in | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +commit 62c1d2bc2da66e182c52bb1e3aa791f6d8a9e099 +Author: Lasse Collin +Date: 2022-07-17 20:55:16 +0300 + + xzgrep: Fix parsing of certain options. + + Fix handling of "xzgrep -25 foo" (in GNU grep "grep -25 foo" is + an alias for "grep -C25 foo"). xzgrep would treat "foo" as filename + instead of as a pattern. This bug was fixed in zgrep in gzip in 2012. + + Add -E, -F, -G, and -P to the "no argument required" list. + + Add -X to "argument required" list. It is an + intentionally-undocumented GNU grep option so this isn't + an important option for xzgrep but it seems that other grep + implementations (well, those that I checked) don't support -X + so I hope this change is an improvement still. + + grep -d (grep --directories=ACTION) requires an argument. In + contrast to zgrep, I kept -d in the "no argument required" list + because it's not supported in xzgrep (or zgrep). This way + "xzgrep -d" gives an error about option being unsupported instead + of telling that it requires an argument. Both zgrep and xzgrep + tell that it's unsupported if an argument is specified. + + Add comments. + + src/scripts/xzgrep.in | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +commit 372a0d12c95fad5e5477b4f79646bf66d28e0c31 +Author: Lasse Collin +Date: 2022-07-14 20:33:05 +0300 + + Tests: Add the .lzma files to test_files.sh. + + tests/test_files.sh | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +commit b8e3d0c45b62c81dfc6b350556d77cf2c7401edd +Author: Lasse Collin +Date: 2022-07-14 19:37:42 +0300 + + Tests: Add .lzma test files. + + tests/files/README | 63 ++++++++++++++++----- + tests/files/bad-too_big_size-with_eopm.lzma | Bin 0 -> 37 bytes + tests/files/bad-too_small_size-without_eopm-1.lzma | Bin 0 -> 31 bytes + tests/files/bad-too_small_size-without_eopm-2.lzma | Bin 0 -> 31 bytes + tests/files/bad-too_small_size-without_eopm-3.lzma | Bin 0 -> 36 bytes + tests/files/bad-unknown_size-without_eopm.lzma | Bin 0 -> 31 bytes + tests/files/good-known_size-with_eopm.lzma | Bin 0 -> 37 bytes + tests/files/good-known_size-without_eopm.lzma | Bin 0 -> 31 bytes + tests/files/good-unknown_size-with_eopm.lzma | Bin 0 -> 37 bytes + 9 files changed, 50 insertions(+), 13 deletions(-) + +commit e96bdf7189916bffec0dcc933cd809fac898587c +Author: Lasse Collin +Date: 2022-07-14 18:12:38 +0300 + + liblzma: Rename a variable and improve a comment. + + src/liblzma/lzma/lzma_decoder.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +commit 2d54fdf58e80bb2b0852633838576fd1ae7961c5 +Author: Lasse Collin +Date: 2022-07-13 22:24:41 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit ff54b557fe3b06b59a24e4028c1464c35dd5b142 +Author: Lasse Collin +Date: 2022-07-13 22:24:07 +0300 + + liblzma: Add optional autodetection of LZMA end marker. + + Turns out that this is needed for .lzma files as the spec in + LZMA SDK says that end marker may be present even if the size + is stored in the header. Such files are rare but exist in the + real world. The code in liblzma is so old that the spec didn't + exist in LZMA SDK back then and I had understood that such + files weren't possible (the lzma tool in LZMA SDK didn't + create such files). + + This modifies the internal API so that LZMA decoder can be told + if EOPM is allowed even when the uncompressed size is known. + It's allowed with .lzma and not with other uses. + + Thanks to Karl Beldan for reporting the problem. + + doc/lzma-file-format.txt | 11 ++++- + src/liblzma/common/alone_decoder.c | 2 +- + src/liblzma/lz/lz_decoder.c | 10 +++- + src/liblzma/lz/lz_decoder.h | 8 +-- + src/liblzma/lzma/lzma2_decoder.c | 2 +- + src/liblzma/lzma/lzma_decoder.c | 99 +++++++++++++++++++++++++++----------- + 6 files changed, 94 insertions(+), 38 deletions(-) + +commit bb795fe835cf2b3ae7ed68b155784fbfc14e2faf +Author: Lasse Collin +Date: 2022-06-14 17:20:49 +0300 + + Tests: Add test file good-1-empty-bcj-lzma2.xz. + + This is from test_bcj_exact_size.c. + It's good to have it as a standalone file. + + tests/files/README | 5 +++++ + tests/files/good-1-empty-bcj-lzma2.xz | Bin 0 -> 52 bytes + 2 files changed, 5 insertions(+) + +commit dbd8b0bf45c10f991635f585a2ea59fb5ae28ff3 +Author: Lasse Collin +Date: 2022-03-31 00:05:07 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit bb66a98ded39c40df202da17cb2055356dcd4168 +Author: Lasse Collin +Date: 2022-03-29 19:19:12 +0300 + + xzgrep: Fix escaping of malicious filenames (ZDI-CAN-16587). + + Malicious filenames can make xzgrep to write to arbitrary files + or (with a GNU sed extension) lead to arbitrary code execution. + + xzgrep from XZ Utils versions up to and including 5.2.5 are + affected. 5.3.1alpha and 5.3.2alpha are affected as well. + This patch works for all of them. + + This bug was inherited from gzip's zgrep. gzip 1.12 includes + a fix for zgrep. + + The issue with the old sed script is that with multiple newlines, + the N-command will read the second line of input, then the + s-commands will be skipped because it's not the end of the + file yet, then a new sed cycle starts and the pattern space + is printed and emptied. So only the last line or two get escaped. + + One way to fix this would be to read all lines into the pattern + space first. However, the included fix is even simpler: All lines + except the last line get a backslash appended at the end. To ensure + that shell command substitution doesn't eat a possible trailing + newline, a colon is appended to the filename before escaping. + The colon is later used to separate the filename from the grep + output so it is fine to add it here instead of a few lines later. + + The old code also wasn't POSIX compliant as it used \n in the + replacement section of the s-command. Using \ is the + POSIX compatible method. + + LC_ALL=C was added to the two critical sed commands. POSIX sed + manual recommends it when using sed to manipulate pathnames + because in other locales invalid multibyte sequences might + cause issues with some sed implementations. In case of GNU sed, + these particular sed scripts wouldn't have such problems but some + other scripts could have, see: + + info '(sed)Locale Considerations' + + This vulnerability was discovered by: + cleemy desu wayo working with Trend Micro Zero Day Initiative + + Thanks to Jim Meyering and Paul Eggert discussing the different + ways to fix this and for coordinating the patch release schedule + with gzip. + + src/scripts/xzgrep.in | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +commit fa3af4e4c6a8f692785b87ba0f165f34d154ae0f +Author: Lasse Collin +Date: 2022-03-23 16:34:00 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit f12ce0f23a0496daebc77e8359309bf724ad5e8a +Author: Lasse Collin +Date: 2022-03-06 16:54:23 +0200 + + liblzma: Fix docs: lzma_block_decoder() cannot return LZMA_UNSUPPORTED_CHECK. + + If Check is unsupported, it will be silently ignored. + It's the caller's job to handle it. + + src/liblzma/api/lzma/block.h | 3 --- + 1 file changed, 3 deletions(-) + +commit 412566731111edc28f0b68bc966d01ab24d17082 +Author: Lasse Collin +Date: 2022-03-06 15:18:58 +0200 + + liblzma: Index hash: Change return value type of hash_append() to void. + + src/liblzma/common/index_hash.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +commit 7c3ce02df001701a26c2fa41b2dc8b530743f454 +Author: Lasse Collin +Date: 2022-02-22 03:42:57 +0200 + + liblzma: Minor addition to lzma_vli_size() API doc. + + Thanks to Jia Tan. + + src/liblzma/api/lzma/vli.h | 2 ++ + 1 file changed, 2 insertions(+) + +commit b8f667fe0c3e17fd2c559901d2aaf19b1dfd51f2 +Author: Lasse Collin +Date: 2022-02-22 02:04:18 +0200 + + liblzma: Check the return value of lzma_index_append() in threaded encoder. + + If lzma_index_append() failed (most likely memory allocation failure) + it could have gone unnoticed and the resulting .xz file would have + an incorrect Index. Decompressing such a file would produce the + correct uncompressed data but then an error would occur when + verifying the Index field. + + src/liblzma/common/stream_encoder_mt.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +commit 2356d53edd8f44ff8f411972ca62d63079ff59af +Author: Lasse Collin +Date: 2022-02-22 01:37:39 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 748ef0833821f1ea4ff5d57abc3cab1ae76266d3 +Author: Ed Maste +Date: 2022-02-11 15:25:46 +0000 + + liblzma: Use non-executable stack on FreeBSD as on Linux + + src/liblzma/check/crc32_x86.S | 4 ++-- + src/liblzma/check/crc64_x86.S | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +commit 068a6e3286c9ca7d67f13f4c78646c4f4a3c3c5b +Author: Lasse Collin +Date: 2022-02-20 20:36:27 +0200 + + liblzma: Make Block decoder catch certain types of errors better. + + Now it limits the input and output buffer sizes that are + passed to a raw decoder. This way there's no need to check + if the sizes can grow too big or overflow when updating + Compressed Size and Uncompressed Size counts. This also means + that a corrupt file cannot cause the raw decoder to process + useless extra input or output that would exceed the size info + in Block Header (and thus cause LZMA_DATA_ERROR anyway). + + More importantly, now the size information is verified more + carefully in case raw decoder returns LZMA_OK. This doesn't + really matter with the current single-threaded .xz decoder + as the errors would be detected slightly later anyway. But + this helps avoiding corner cases in the upcoming threaded + decompressor, and it might help other Block decoder uses + outside liblzma too. + + The test files bad-1-lzma2-{9,10,11}.xz test these conditions. + With the single-threaded .xz decoder the only difference is + that LZMA_DATA_ERROR is detected in a difference place now. + + src/liblzma/common/block_decoder.c | 79 ++++++++++++++++++++++++++------------ + 1 file changed, 54 insertions(+), 25 deletions(-) + +commit 766df4f62c165ba96d60af8fadbada51349fd140 +Author: Lasse Collin +Date: 2022-02-20 19:38:55 +0200 + + Tests: Add bad-1-lzma2-11.xz. + + tests/files/README | 5 +++++ + tests/files/bad-1-lzma2-11.xz | Bin 0 -> 64 bytes + 2 files changed, 5 insertions(+) + +commit 12a6d6ce2a9850e32140e542decfd9c8a33c87fc +Author: Lasse Collin +Date: 2022-02-18 18:51:10 +0200 + + Translations: Fix po4a failure with the French man page translations. + + Thanks to Mario Blättermann for the patch. + + po4a/fr_FR.po | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +commit 00e6aad836cf301e95575ea53790f5ab96fe8ed6 +Author: Lasse Collin +Date: 2022-02-07 01:14:37 +0200 + + Translations: Add French translation of man pages. + + This matches xz-utils 5.2.5-2 in Debian. + + The translation was done by "bubu", proofread by the debian-l10n-french + mailing list contributors, and submitted to me on the xz-devel mailing + list by Jean-Pierre Giraud. Thanks to everyone! + + po4a/fr_FR.po | 3541 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + po4a/po4a.conf | 2 +- + 2 files changed, 3542 insertions(+), 1 deletion(-) + +commit e20ce2b12251a246c50fb5b7fa2204c11611b407 +Author: jiat75 +Date: 2022-01-28 20:47:55 +0800 + + liblzma: Add NULL checks to LZMA and LZMA2 properties encoders. + + Previously lzma_lzma_props_encode() and lzma_lzma2_props_encode() + assumed that the options pointers must be non-NULL because the + with these filters the API says it must never be NULL. It is + good to do these checks anyway. + + src/liblzma/lzma/lzma2_encoder.c | 3 +++ + src/liblzma/lzma/lzma_encoder.c | 3 +++ + 2 files changed, 6 insertions(+) + +commit feb80ace866c3033cefbc39f6b07ada83e316e6a +Author: huangqinjin +Date: 2021-12-13 20:49:21 +0800 + + CMake: Keep compatible with Windows 95 for 32-bit build. + + CMakeLists.txt | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +commit 725f2e0522c4ef8dc5fdd69b5017bac6322ebce2 +Author: Lasse Collin +Date: 2021-11-13 21:04:05 +0200 + + xzgrep: Update man page timestamp. + + src/scripts/xzgrep.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 7955669d423724e69a7f2dae1173dae248f7a596 +Author: Lasse Collin +Date: 2021-11-13 18:23:24 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 671673a7a209e6131491e6a424758bbc13fb7aa1 +Author: Ville Skyttä +Date: 2021-11-13 10:11:57 +0200 + + xzgrep: use `grep -E/-F` instead of `egrep` and `fgrep` + + `egrep` and `fgrep` have been deprecated in GNU grep since 2007, and in + current post 3.7 Git they have been made to emit obsolescence warnings: + https://git.savannah.gnu.org/cgit/grep.git/commit/?id=a9515624709865d480e3142fd959bccd1c9372d1 + + src/scripts/xzgrep.1 | 8 ++++---- + src/scripts/xzgrep.in | 4 ++-- + 2 files changed, 6 insertions(+), 6 deletions(-) + +commit 45e538257e85050f67a6d363961e31b71e3ec565 +Author: Lasse Collin +Date: 2021-10-27 23:27:48 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit ca21733d24a4f65b46a5efa124a89c859ac5592e +Author: Lasse Collin +Date: 2021-10-27 23:23:11 +0300 + + xz: Change the coding style of the previous commit. + + It isn't any better now but it's consistent with + the rest of the code base. + + src/xz/file_io.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +commit 906b990b151b93750958371e39ad00e8d1daada9 +Author: Alexander Bluhm +Date: 2021-10-05 23:33:16 +0200 + + xz: Avoid fchown(2) failure. + + OpenBSD does not allow to change the group of a file if the user + does not belong to this group. In contrast to Linux, OpenBSD also + fails if the new group is the same as the old one. Do not call + fchown(2) in this case, it would change nothing anyway. + + This fixes an issue with Perl Alien::Build module. + https://github.com/PerlAlien/Alien-Build/issues/62 + + src/xz/file_io.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +commit ca83df96c48371ce0094c73c6f4582899ae4b62b +Author: Lasse Collin +Date: 2021-09-09 22:21:07 +0300 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit d8b294af03840c7c47d6b7b0d4c0d6d2f6357dfa +Author: Lasse Collin +Date: 2021-09-09 21:41:51 +0300 + + liblzma: Use _MSVC_LANG to detect when "noexcept" can be used with MSVC. + + By default, MSVC always sets __cplusplus to 199711L. The real + C++ standard version is available in _MSVC_LANG (or one could + use /Zc:__cplusplus to set __cplusplus correctly). + + Fixes . + + Thanks to Dan Weiss. + + src/liblzma/api/lzma.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +commit c2fde22befe1f5256ad3d7c4cb35794aa8e64c67 +Author: Lasse Collin +Date: 2021-06-04 19:02:38 +0300 + + xzdiff: Update the man page about the exit status. + + This was forgotten from 194029ffaf74282a81f0c299c07f73caca3232ca. + + src/scripts/xzdiff.1 | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit 8d0fd42fbedd01fd3c847b56d19841e603500dea +Author: Lasse Collin +Date: 2021-06-04 18:52:48 +0300 + + xzless: Fix less(1) version detection when it contains a dot. + + Sometimes the version number from "less -V" contains a dot, + sometimes not. xzless failed detect the version number when + it does contain a dot. This fixes it. + + Thanks to nick87720z for reporting this. Apparently it had been + reported here in 2013. + + src/scripts/xzless.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 6e2cab8579fc6f832b5954510b27fd7e74563436 +Author: Lasse Collin +Date: 2022-07-12 18:53:04 +0300 + + xz: Document the special memlimit case of 2000 MiB on MIPS32. + + See commit 95806a8a52ae57bddf6c77dfd19cf7938a92e040. + + src/xz/xz.1 | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +commit 38b311462bd8974888ba3cd9312de132c76ec2e0 +Author: Lasse Collin +Date: 2021-04-11 19:58:10 +0300 + + Update THANKS. + + THANKS | 2 ++ + 1 file changed, 2 insertions(+) + +commit 95806a8a52ae57bddf6c77dfd19cf7938a92e040 +Author: Ivan A. Melnikov +Date: 2021-04-09 11:45:10 +0300 + + Reduce maximum possible memory limit on MIPS32 + + Due to architectural limitations, address space available to a single + userspace process on MIPS32 is limited to 2 GiB, not 4, even on systems + that have more physical RAM -- e.g. 64-bit systems with 32-bit + userspace, or systems that use XPA (an extension similar to x86's PAE). + + So, for MIPS32, we have to impose stronger memory limits. I've chosen + 2000MiB to give the process some headroom. + + src/xz/hardware.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +commit a79bd30a6fbfe614f47ad77498c46e218dbe2b1d +Author: Lasse Collin +Date: 2021-02-13 23:31:27 +0200 + + CMake: Use interface library for better FindLibLZMA compatibility. + + https://www.mail-archive.com/xz-devel@tukaani.org/msg00446.html + + Thanks to Markus Rickert. + + CMakeLists.txt | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +commit 64d98147616c48f2df37fb46b49350f3d5febd34 +Author: Lasse Collin +Date: 2021-01-30 18:36:04 +0200 + + CMake: Try to improve compatibility with the FindLibLZMA module. + + The naming conflict with FindLibLZMA module gets worse. + Not avoiding it in the first place was stupid. + + Normally find_package(LibLZMA) will use the module and + find_package(liblzma 5.2.5 REQUIRED CONFIG) will use the config + file even with a case insensitive file system. However, if + CMAKE_FIND_PACKAGE_PREFER_CONFIG is TRUE and the file system + is case insensitive, find_package(LibLZMA) will find our liblzma + config file instead of using FindLibLZMA module. + + One big problem with this is that FindLibLZMA uses + LibLZMA::LibLZMA and we use liblzma::liblzma as the target + name. With target names CMake happens to be case sensitive. + To workaround this, this commit adds + + add_library(LibLZMA::LibLZMA ALIAS liblzma::liblzma) + + to the config file. Then both spellings work. + + To make the behavior consistent between case sensitive and + insensitive file systems, the config and related files are + renamed from liblzmaConfig.cmake to liblzma-config.cmake style. + With this style CMake looks for lowercase version of the package + name so find_package(LiBLzmA 5.2.5 REQUIRED CONFIG) will work + to find our config file. + + There are other differences between our config file and + FindLibLZMA so it's still possible that things break for + reasons other than the spelling of the target name. Hopefully + those situations aren't too common. + + When the config file is available, it should always give as good or + better results as FindLibLZMA so this commit doesn't affect the + recommendation to use find_package(liblzma 5.2.5 REQUIRED CONFIG) + which explicitly avoids FindLibLZMA. + + Thanks to Markus Rickert. + + CMakeLists.txt | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +commit 9e2f9e2d0841dd0417125ca3c7a05418541fb230 +Author: Lasse Collin +Date: 2021-01-24 22:32:41 +0200 + + Tests: Add bad-1-lzma2-10.xz and also modify -9.xz. + + tests/files/README | 11 +++++++++-- + tests/files/bad-1-lzma2-10.xz | Bin 0 -> 60 bytes + tests/files/bad-1-lzma2-9.xz | Bin 72 -> 72 bytes + 3 files changed, 9 insertions(+), 2 deletions(-) + +commit 00a4c69bbbf302dd6323fd6970890ee22ddd4f96 +Author: Lasse Collin +Date: 2021-01-24 18:51:51 +0200 + + Tests: Add bad-1-lzma2-9.xz. + + tests/files/README | 4 ++++ + tests/files/bad-1-lzma2-9.xz | Bin 0 -> 72 bytes + 2 files changed, 4 insertions(+) + +commit 1da2269b2e9b952d165ad7bec823de9dfe9a9e9e +Author: Lasse Collin +Date: 2021-01-24 17:02:00 +0200 + + Tests: Add bad-1-check-crc32-2.xz. + + tests/files/README | 7 +++++++ + tests/files/bad-1-check-crc32-2.xz | Bin 0 -> 72 bytes + 2 files changed, 7 insertions(+) + +commit 11ceecb5e2999ff96989915120e63a2e7e130057 +Author: Lasse Collin +Date: 2021-01-11 23:57:11 +0200 + + Scripts: Add zstd support to xzdiff. + + src/scripts/xzdiff.1 | 6 ++++-- + src/scripts/xzdiff.in | 16 +++++++++++----- + 2 files changed, 15 insertions(+), 7 deletions(-) + +commit d655b8c9cb5a4e596c0fa2290fa91e995b734f97 +Author: Lasse Collin +Date: 2021-01-11 23:28:52 +0200 + + Scripts: Fix exit status of xzgrep. + + Omit the -q option from xz, gzip, and bzip2. With xz this shouldn't + matter. With gzip it's important because -q makes gzip replace SIGPIPE + with exit status 2. With bzip2 it's important because with -q bzip2 + is completely silent if input is corrupt while other decompressors + still give an error message. + + Avoiding exit status 2 from gzip is important because bzip2 uses + exit status 2 to indicate corrupt input. Before this commit xzgrep + didn't recognize corrupt .bz2 files because xzgrep was treating + exit status 2 as SIGPIPE for gzip compatibility. + + zstd still needs -q because otherwise it is noisy in normal + operation. + + The code to detect real SIGPIPE didn't check if the exit status + was due to a signal (>= 128) and so could ignore some other exit + status too. + + src/scripts/xzgrep.in | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +commit 09c331b03c801414d547fc25711daf099cef6fb5 +Author: Lasse Collin +Date: 2021-01-11 22:01:51 +0200 + + Scripts: Fix exit status of xzdiff/xzcmp. + + This is a minor fix since this affects only the situation when + the files differ and the exit status is something else than 0. + In such case there could be SIGPIPE from a decompression tool + and that would result in exit status of 2 from xzdiff/xzcmp + while the correct behavior would be to return 1 or whatever + else diff or cmp may have returned. + + This commit omits the -q option from xz/gzip/bzip2/lzop arguments. + I'm not sure why the -q was used in the first place, perhaps it + hides warnings in some situation that I cannot see at the moment. + Hopefully the removal won't introduce a new bug. + + With gzip the -q option was harmful because it made gzip return 2 + instead of >= 128 with SIGPIPE. Ignoring exit status 2 (warning + from gzip) isn't practical because bzip2 uses exit status 2 to + indicate corrupt input file. It's better if SIGPIPE results in + exit status >= 128. + + With bzip2 the removal of -q seems to be good because with -q + it prints nothing if input is corrupt. The other tools aren't + silent in this situation even with -q. On the other hand, if + zstd support is added, it will need -q since otherwise it's + noisy in normal situations. + + Thanks to Étienne Mollier and Sebastian Andrzej Siewior. + + src/scripts/xzdiff.in | 35 +++++++++++++++++++++-------------- + 1 file changed, 21 insertions(+), 14 deletions(-) + +commit b33a345cbab90e18b601d26cc3a0dea083b7d8d9 +Author: Lasse Collin +Date: 2020-12-23 17:15:49 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit c01e29a933baba737217a3549eaaced7e499edf5 +Author: H.J. Lu +Date: 2020-12-23 06:49:04 -0800 + + liblzma: Enable Intel CET in x86 CRC assembly codes + + When Intel CET is enabled, we need to include in assembly codes + to mark Intel CET support and add _CET_ENDBR to indirect jump targets. + + Tested on Intel Tiger Lake under CET enabled Linux. + + src/liblzma/check/crc32_x86.S | 9 +++++++++ + src/liblzma/check/crc64_x86.S | 9 +++++++++ + 2 files changed, 18 insertions(+) + +commit 0983682f87ae1a9efcbe4e45579d53c9bca16500 +Author: Lasse Collin +Date: 2020-12-16 18:33:29 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 880596e4b8731b24f6c19cb737ed7d06fe92dda2 +Author: Lasse Collin +Date: 2020-12-16 18:30:14 +0200 + + Build: Don't build bundles on Apple OSes. + + Thanks to Daniel Packard. + + CMakeLists.txt | 3 +++ + 1 file changed, 3 insertions(+) + +commit 29050c79f1f83bf73714146a44dfbb4d26e27920 +Author: Lasse Collin +Date: 2020-12-05 22:44:03 +0200 + + Update THANKS. + + THANKS | 1 + + 1 file changed, 1 insertion(+) + +commit 94fd72474973978f8369be7c23f77c3b741953a6 +Author: Adam Borowski +Date: 2020-09-25 03:35:18 +0200 + + Scripts: Add zstd support to xzgrep. + + Thanks to Adam Borowski. + + src/scripts/xzgrep.1 | 9 ++++++--- + src/scripts/xzgrep.in | 1 + + 2 files changed, 7 insertions(+), 3 deletions(-) + +commit 13c58ac13e1a84c30de24191931af425286d0550 +Author: Lasse Collin +Date: 2020-11-17 20:51:48 +0200 + + CMake: Fix compatibility with CMake 3.13. + + The syntax "if(DEFINED CACHE{FOO})" requires CMake 3.14. + In some other places the code treats the cache variables + like normal variables already (${FOO} or if(FOO) is used, + not ${CACHE{FOO}). + + Thanks to ygrek for reporting the bug on IRC. + + CMakeLists.txt | 2 +- + cmake/tuklib_cpucores.cmake | 4 ++-- + cmake/tuklib_physmem.cmake | 4 ++-- + 3 files changed, 5 insertions(+), 5 deletions(-) + +commit 8f7d3345a7c31a20a544dc04232715b1bcdc91f7 +Author: Lasse Collin +Date: 2020-11-01 22:56:43 +0200 + + Update THANKS. + + THANKS | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +commit ca7bcdb30f14e7cd66fe357d10046442eee648a0 +Author: Lasse Collin +Date: 2020-11-01 22:34:25 +0200 + + xz: Avoid unneeded \f escapes on the man page. + + I don't want to use \c in macro arguments but groff_man(7) + suggests that \f has better portability. \f would be needed + for the .TP strings for portability reasons anyway. + + Thanks to Bjarni Ingi Gislason. + + src/xz/xz.1 | 31 ++++++++++++++++++++++--------- + 1 file changed, 22 insertions(+), 9 deletions(-) + +commit 3b40a0792ee5ba0dd4d9ab129b4443585bbfdd14 +Author: Lasse Collin +Date: 2020-11-01 19:09:53 +0200 + + xz: Use non-breaking spaces when intentionally using more than one space. + + This silences some style checker warnings. Seems that spaces + in the beginning of a line don't need this treatment. + + Thanks to Bjarni Ingi Gislason. + + src/xz/xz.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit d85699c36df5f839b82800a6e63151eff25677eb +Author: Lasse Collin +Date: 2020-11-01 18:49:37 +0200 + + xz: Protect the ellipsis (...) on the man page with \&. + + This does it only when ... appears outside macro calls. + + Thanks to Bjarni Ingi Gislason. + + src/xz/xz.1 | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit d996ae66176206de9b4d485b250a88b2d9ded151 +Author: Lasse Collin +Date: 2020-11-01 18:41:21 +0200 + + xz: Avoid the abbreviation "e.g." on the man page. + + A few are simply omitted, most are converted to "for example" + and surrounded with commas. Sounds like that this is better + style, for example, man-pages(7) recommends avoiding such + abbreviations except in parenthesis. + + Thanks to Bjarni Ingi Gislason. + + src/xz/xz.1 | 66 ++++++++++++++++++++++++++++++------------------------------- + 1 file changed, 33 insertions(+), 33 deletions(-) + +commit d16d0d198ade6820ed9d39ba6a7068b55b8bf6dc +Author: Lasse Collin +Date: 2020-07-12 23:10:03 +0300 + + xz man page: Change \- (minus) to \(en (en-dash) for a numeric range. + + Docs of ancient troff/nroff mention \(em (em-dash) but not \(en + and \- was used for both minus and en-dash. I don't know how + portable \(en is nowadays but it can be changed back if someone + complains. At least GNU groff and OpenBSD's mandoc support it. + + Thanks to Bjarni Ingi Gislason for the patch. + + src/xz/xz.1 | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +commit 3acf1adfc768c7d2290e138bcb1109145e4d8160 +Author: Lasse Collin +Date: 2020-07-12 20:46:24 +0300 + + Windows: Fix building of resource files when config.h isn't used. + + Now CMake + Visual Studio works for building liblzma.dll. + + Thanks to Markus Rickert. + + src/common/common_w32res.rc | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +commit adba06e649875b45b5b671f5ab45f694144938d4 +Author: Lasse Collin +Date: 2020-04-06 19:31:50 +0300 + + src/scripts/xzgrep.1: Filenames to xzgrep are optional. + + xzgrep --help was correct already. + + src/scripts/xzgrep.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 7be1dcf8585e150bffb57a6ad499a63edd0d3494 +Author: Lasse Collin +Date: 2022-07-12 17:59:41 +0300 + + Translations: Add Portuguese translation. + + Jia Tan made white-space changes and also changed "Language: pt_BR\n" + to pt. The translator wasn't reached so I'm hoping these changes + are OK and will commit it without translator's approval. + + Thanks to Pedro Albuquerque and Jia Tan. + + po/LINGUAS | 1 + + po/pt.po | 1001 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 1002 insertions(+) + +commit 3f94d2a568290bf770a762ee69403955fdb0a228 +Author: Bjarni Ingi Gislason +Date: 2020-03-26 22:17:31 +0000 + + src/script/xzgrep.1: Remove superfluous '.RB' + + Output is from: test-groff -b -e -mandoc -T utf8 -rF0 -t -w w -z + + [ "test-groff" is a developmental version of "groff" ] + + Input file is ./src/scripts/xzgrep.1 + + :20 (macro RB): only 1 argument, but more are expected + :23 (macro RB): only 1 argument, but more are expected + :26 (macro RB): only 1 argument, but more are expected + :29 (macro RB): only 1 argument, but more are expected + :32 (macro RB): only 1 argument, but more are expected + + "abc..." does not mean the same as "abc ...". + + The output from nroff and troff is unchanged except for the space + between "file" and "...". + + Signed-off-by: Bjarni Ingi Gislason + + src/scripts/xzgrep.1 | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +commit 725d9791c90b7cef19a3ab4d7469b567101c2a87 +Author: Bjarni Ingi Gislason +Date: 2020-03-30 21:56:36 +0000 + + xzgrep.1: Delete superfluous '.PP' + + Summary: + + mandoc -T lint xzgrep.1 : + mandoc: xzgrep.1:79:2: WARNING: skipping paragraph macro: PP empty + + There is no change in the output of "nroff" and "troff". + + Signed-off-by: Bjarni Ingi Gislason + + src/scripts/xzgrep.1 | 1 - + 1 file changed, 1 deletion(-) + +commit 55c2555c5d420e9251a97828022e5c8c4d786dac +Author: Bjarni Ingi Gislason +Date: 2020-03-26 21:16:18 +0000 + + src/xz/xz.1: Correct misused two-fonts macros + + Output is from: test-groff -b -e -mandoc -T utf8 -rF0 -t -w w -z + + [ "test-groff" is a developmental version of "groff" ] + + Input file is ./src/xz/xz.1 + + :408 (macro BR): only 1 argument, but more are expected + :1009 (macro BR): only 1 argument, but more are expected + :1743 (macro BR): only 1 argument, but more are expected + :1920 (macro BR): only 1 argument, but more are expected + :2213 (macro BR): only 1 argument, but more are expected + + Output from nroff and troff is unchanged, except for a font change of a + full stop (.). + + Signed-off-by: Bjarni Ingi Gislason + + src/xz/xz.1 | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +commit 55e3a8e0e437279223c108085dccddc60b06036c +Author: Lasse Collin +Date: 2022-07-10 21:16:40 +0300 + + Translations: Add Serbian translation. + + Quite a few white-space changes were made by Jia Tan to make + this look good. Contacting the translator didn't succeed so + I'm committing this without getting translator's approval. + + Thanks to Мирослав Николић (Miroslav Nikolic) and Jia Tan. + + po/LINGUAS | 1 + + po/sr.po | 987 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 988 insertions(+) + +commit 4e04279cbe8c1af25dde42ebf740722deded27a0 +Author: Lasse Collin +Date: 2022-07-04 23:51:36 +0300 + + Translations: Add Swedish translation. + + Thanks to Sebastian Rasmussen and Jia Tan. + + po/LINGUAS | 1 + + po/sv.po | 983 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 984 insertions(+) + +commit 27f55e96b0cdae3fc51b409663b6c017a88ea8bc +Author: Lasse Collin +Date: 2022-07-04 23:40:27 +0300 + + Translations: Add Esperanto translation. + + Thanks to Keith Bowes and Jia Tan. + + po/LINGUAS | 1 + + po/eo.po | 984 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 985 insertions(+) + +commit be31be9e8f12cf6fce4be1ab822660ad411e803b +Author: Lasse Collin +Date: 2022-07-01 00:22:33 +0300 + + Translations: Add Catalan translation. + + Thanks to Jordi Mas and Jia Tan. + + po/LINGUAS | 1 + + po/ca.po | 1076 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 1077 insertions(+) + +commit 30e3ad040f4e7ba1a0e97179d6fd62a1201c863b +Author: Lasse Collin +Date: 2022-06-30 17:47:08 +0300 + + Translations: Add Ukrainian translation. + + Thanks to Yuri Chornoivan and Jia Tan. + + po/LINGUAS | 1 + + po/uk.po | 996 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 997 insertions(+) + +commit f2e2cce49f7b07e8985ba6add4b4ca79b22eddd0 +Author: Lasse Collin +Date: 2022-06-30 17:45:26 +0300 + + Translators: Add Romanian translation. + + Thanks to Remus-Gabriel Chelu and Jia Tan. + + po/LINGUAS | 1 + + po/ro.po | 1016 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 1017 insertions(+) + +commit a12fd5bad2aa922c9b4cf96d16632696f3e0584e +Author: Lasse Collin +Date: 2022-06-29 18:33:32 +0300 + + Translations: Update Brazilian Portuguese translation. + + One msgstr was changed. The diff is long due to changes + in the source code line numbers in the comments. + + Thanks to Rafael Fontenelle. + + po/pt_BR.po | 186 ++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 92 insertions(+), 94 deletions(-) + +commit dd713288d27a0d64426d74effe9187087496333c +Author: Lasse Collin +Date: 2022-06-29 18:04:44 +0300 + + Translations: Add Croatian translation. + + Thanks to Božidar Putanec and Jia Tan. + + po/LINGUAS | 1 + + po/hr.po | 987 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 988 insertions(+) + +commit 016980f26974623a4bcd889b6ba5a0caa4b2f57f +Author: Lasse Collin +Date: 2022-06-29 17:58:48 +0300 + + Translations: Add Spanish translation. + + Thanks to Cristian Othón Martínez Vera and Jia Tan. + + po/LINGUAS | 1 + + po/es.po | 984 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 985 insertions(+) + +commit 4fe9578c3aab21e6ceb8f0ea470bc87821f5907f +Author: Lasse Collin +Date: 2022-06-29 17:49:43 +0300 + + Translations: Add Korean translation. + + Thanks to Seong-ho Cho and Jia Tan. + + po/LINGUAS | 1 + + po/ko.po | 972 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 973 insertions(+) + +commit cf1ec5518e6e3d89f57685ac83b81ab228e48bcd +Author: Lasse Collin +Date: 2020-03-23 18:09:40 +0200 + + v5.2-specific typo fix from fossies.org. + + README | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 968bbfea09447a4f52e856bb97301aca80e40dcb +Author: Lasse Collin +Date: 2020-03-23 18:07:50 +0200 + + Typo fixes from fossies.org. + + https://fossies.org/linux/misc/xz-5.2.5.tar.xz/codespell.html + + Makefile.am | 2 +- + doc/examples/01_compress_easy.c | 2 +- + src/liblzma/api/lzma/base.h | 2 +- + src/liblzma/check/crc32_x86.S | 2 +- + src/liblzma/common/index.c | 2 +- + src/xz/xz.1 | 4 ++-- + 6 files changed, 7 insertions(+), 7 deletions(-) + commit 2327a461e1afce862c22269b80d3517801103c1b Author: Lasse Collin Date: 2020-03-17 16:27:42 +0200 Bump version and soname for 5.2.5. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 3be82d2f7dc882258caf0f0a69214e5916b2bdda Author: Lasse Collin Date: 2020-03-17 16:26:04 +0200 Update NEWS for 5.2.5. NEWS | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) commit ab3e57539c7337f0653b13b75dbc5d03ade9700e Author: Lasse Collin Date: 2020-03-16 21:57:21 +0200 Translations: Rebuild cs.po to avoid incorrect fuzzy strings. "make dist" updates the .po files and the fuzzy strings would result in multiple very wrong translations. po/cs.po | 592 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 322 insertions(+), 270 deletions(-) commit 3a6f38309dc5d44d8a63ebb337b6b2028561c93e Author: Lasse Collin Date: 2020-03-16 20:01:37 +0200 README: Update outdated sections. README | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) commit 9cc0901798217e258e91c13cf6fda7ad42ba108c Author: Lasse Collin Date: 2020-03-16 19:46:27 +0200 README: Mention that translatable strings will change after 5.2.x. README | 74 +++--------------------------------------------------------------- 1 file changed, 3 insertions(+), 71 deletions(-) commit cc163574249f6a4a66f3dc09d6fe5a71bee24fab Author: Lasse Collin Date: 2020-03-16 19:39:45 +0200 README: Mention that man pages can be translated. README | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit ca261994edc3f2d03d5589c037171c63471ee9dc Author: Lasse Collin Date: 2020-03-16 17:30:39 +0200 Translations: Add partial Danish translation. I made a few minor white space changes without getting them approved by the Danish translation team. po/LINGUAS | 1 + po/da.po | 896 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 897 insertions(+) commit 51cd5d051fc730d61411dee292e863582784e189 Author: Lasse Collin Date: 2020-03-16 16:43:29 +0200 Update INSTALL.generic from Automake 1.16.1. INSTALL.generic | 321 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 162 insertions(+), 159 deletions(-) commit 69d694e5f1beae2bbfa3b6c348ec0ec5f14b5cd0 Author: Lasse Collin Date: 2020-03-15 15:27:22 +0200 Update INSTALL for Windows and DOS and add preliminary info for z/OS. INSTALL | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) commit 2c3b1bb80a3ca7e09728fe4d7a1d8648a5cb9bca Author: Lasse Collin Date: 2020-03-15 15:26:20 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive (again). m4/ax_pthread.m4 | 219 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 117 insertions(+), 102 deletions(-) commit 74a5af180a6a6c4b8c90cefb37ee900d3fea7dc6 Author: Lasse Collin Date: 2020-03-11 21:15:35 +0200 xz: Never use thousand separators in DJGPP builds. DJGPP 2.05 added support for thousands separators but it's broken at least under WinXP with Finnish locale that uses a non-breaking space as the thousands separator. Workaround by disabling thousands separators for DJGPP builds. src/xz/util.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) commit ceba0d25e826bcdbf64bb4cb03385a2a66f8cbcb Author: Lasse Collin Date: 2020-03-11 19:38:08 +0200 DOS: Update dos/Makefile for DJGPP 2.05. It doesn't need -fgnu89-inline like 2.04beta did. dos/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit 29e5bd71612253281fb22bbaa0a566990a74dcc3 Author: Lasse Collin Date: 2020-03-11 19:36:07 +0200 DOS: Update instructions in dos/INSTALL.txt. dos/INSTALL.txt | 59 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) commit 00a037ee9c8ee5a03cf9744e05570ae93d56b875 Author: Lasse Collin Date: 2020-03-11 17:58:51 +0200 DOS: Update config.h. The added defines assume GCC >= 4.8. dos/config.h | 8 ++++++++ 1 file changed, 8 insertions(+) commit 4ec2feaefa310b4249eb41893caf526e5c51ee39 Author: Lasse Collin Date: 2020-03-11 22:37:54 +0200 Translations: Add hu, zh_CN, and zh_TW. I made a few white space changes to these without getting them approved by the translation teams. (I tried to contact the hu and zh_TW teams but didn't succeed. I didn't contact the zh_CN team.) po/LINGUAS | 3 + po/hu.po | 985 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/zh_CN.po | 963 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/zh_TW.po | 956 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 2907 insertions(+) commit b6ed09729ae408be4533a0ddbc7df3d6f566846a Author: Lasse Collin Date: 2020-03-11 14:33:30 +0200 Translations: Update vi.po to match the file from the TP. The translated strings haven't been updated but word wrapping is different. po/vi.po | 407 ++++++++++++++++++++++++++++----------------------------------- 1 file changed, 179 insertions(+), 228 deletions(-) commit 7c85e8953ced204c858101872a15183e4639e9fb Author: Lasse Collin Date: 2020-03-11 14:18:03 +0200 Translations: Add fi and pt_BR, and update de, fr, it, and pl. The German translation isn't identical to the file in the Translation Project but the changes (white space changes only) were approved by the translator Mario Blättermann. po/LINGUAS | 2 + po/de.po | 476 ++++++++++++++-------------- po/fi.po | 974 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po/fr.po | 272 ++++++++-------- po/it.po | 479 ++++++++++++---------------- po/pl.po | 239 +++++++------- po/pt_BR.po | 1001 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 2697 insertions(+), 746 deletions(-) commit 7da3ebc67fb5414034685ec16c7a29dad03dfa9b Author: Lasse Collin Date: 2020-02-25 21:35:14 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 1acc48794364606c9091cae6fa56db75a1325114 Author: Lasse Collin Date: 2020-03-11 13:05:29 +0200 Build: Add very limited experimental CMake support. This version matches CMake files in the master branch (commit 265daa873c0d871f5f23f9b56e133a6f20045a0a) except that this omits two source files that aren't in v5.2 and in the beginning of CMakeLists.txt the first paragraph in the comment is slightly different to point out possible issues in building shared liblzma. CMakeLists.txt | 659 ++++++++++++++++++++++++++++++++++++++++++++ cmake/tuklib_common.cmake | 49 ++++ cmake/tuklib_cpucores.cmake | 175 ++++++++++++ cmake/tuklib_integer.cmake | 102 +++++++ cmake/tuklib_mbstr.cmake | 20 ++ cmake/tuklib_physmem.cmake | 150 ++++++++++ cmake/tuklib_progname.cmake | 19 ++ 7 files changed, 1174 insertions(+) commit 9acc6abea1552803c74c1486fbb10af119550772 Author: Lasse Collin Date: 2020-02-27 20:24:27 +0200 Build: Add support for --no-po4a option to autogen.sh. Normally, if po4a isn't available, autogen.sh will return with non-zero exit status. The option --no-po4a can be useful when one knows that po4a isn't available but wants autogen.sh to still return with zero exit status. autogen.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) commit c8853b31545db7bd0551be85949624b1261efd47 Author: Lasse Collin Date: 2020-02-24 23:37:07 +0200 Update m4/.gitignore. m4/.gitignore | 1 + 1 file changed, 1 insertion(+) commit 901eb4a8c992354c3ea482f5bad60a1f8ad6fcc8 Author: Lasse Collin Date: 2020-02-24 23:01:00 +0200 liblzma: Remove unneeded from fastpos_tablegen.c. This file only generates fastpos_table.c. It isn't built as a part of liblzma. src/liblzma/lzma/fastpos_tablegen.c | 1 - 1 file changed, 1 deletion(-) commit ac35c9585fb734b7a19785d490c152e0b8cd4663 Author: Lasse Collin Date: 2020-02-22 14:15:07 +0200 Use defined(__GNUC__) before __GNUC__ in preprocessor lines. This should silence the equivalent of -Wundef in compilers that don't define __GNUC__. src/common/sysdefs.h | 3 ++- src/liblzma/api/lzma.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) commit fb9cada7cfade1156d6277717280e05b5cd342d6 Author: Lasse Collin Date: 2020-02-21 17:40:02 +0200 liblzma: Add more uses of lzma_memcmplen() to the normal mode of LZMA. This gives a tiny encoder speed improvement. This could have been done in 2014 after the commit 544aaa3d13554e8640f9caf7db717a96360ec0f6 but it was forgotten. src/liblzma/lzma/lzma_encoder_optimum_normal.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) commit 6117955af0b9cef5acde7859e86f773692b5f43c Author: Lasse Collin Date: 2020-02-21 17:01:15 +0200 Build: Add visibility.m4 from gnulib. Appears that this file used to get included as a side effect of gettext. After the change to gettext version requirements this file no longer got copied to the package and so the build was broken. m4/.gitignore | 1 - m4/visibility.m4 | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) commit c2cc64d78c098834231f9cfd7d852c9cd8950d74 Author: Lasse Collin Date: 2020-02-21 16:10:44 +0200 xz: Silence a warning when sig_atomic_t is long int. It can be true at least on z/OS. src/xz/signals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b6314aa275b35c714e0a191d0b2e9b6106129ea9 Author: Lasse Collin Date: 2020-02-21 15:59:26 +0200 xz: Avoid unneeded access of a volatile variable. src/xz/signals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit f772a1572f723e5dc7d2d32e1d4287ac7a0da55e Author: Lasse Collin Date: 2020-02-21 01:24:18 +0200 tuklib_integer.m4: Optimize the check order. The __builtin byteswapping is the preferred one so check for it first. m4/tuklib_integer.m4 | 56 +++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) commit 641042e63f665f3231c2fd1241fd3dddda3fb313 Author: Lasse Collin Date: 2020-02-20 18:54:04 +0200 tuklib_exit: Add missing header. strerror() needs which happened to be included via tuklib_common.h -> tuklib_config.h -> sysdefs.h if HAVE_CONFIG_H was defined. This wasn't tested without config.h before so it had worked fine. src/common/tuklib_exit.c | 1 + 1 file changed, 1 insertion(+) commit dbd55a69e530fec9ae866aaf6c3ccc0b4daf1f1f Author: Lasse Collin Date: 2020-02-16 11:18:28 +0200 sysdefs.h: Omit the conditionals around string.h and limits.h. string.h is used unconditionally elsewhere in the project and configure has always stopped if limits.h is missing, so these headers must have been always available even on the weirdest systems. src/common/sysdefs.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) commit 9294909861e6d22b32418467e0e988f953a82264 Author: Lasse Collin Date: 2020-02-15 15:07:11 +0200 Build: Bump Autoconf and Libtool version requirements. There is no specific reason for this other than blocking the most ancient versions. These are still old: Autoconf 2.69 (2012) Automake 1.12 (2012) gettext 0.19.6 (2015) Libtool 2.4 (2010) configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit bd09081bbdf552f730030d2fd0e5e39ccb3936af Author: Lasse Collin Date: 2020-02-15 03:08:32 +0200 Build: Use AM_GNU_GETTEXT_REQUIRE_VERSION and require 0.19.6. This bumps the version requirement from 0.19 (from 2014) to 0.19.6 (2015). Using only the old AM_GNU_GETTEXT_VERSION results in old gettext infrastructure being placed in the package. By using both macros we get the latest gettext files while the other programs in the Autotools family can still see the old macro. configure.ac | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 1e5e08d86534aec7ca957982c7f6e90203c19e9f Author: Lasse Collin Date: 2020-02-14 20:42:06 +0200 Translations: Add German translation of the man pages. Thanks to Mario Blättermann. po4a/de.po | 5532 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ po4a/po4a.conf | 2 +- 2 files changed, 5533 insertions(+), 1 deletion(-) commit 4b1447809ffbc0d77c0ad456bd6b3afcf0b8623e Author: Lasse Collin Date: 2020-02-07 15:32:21 +0200 Build: Add support for translated man pages using po4a. The dependency on po4a is optional. It's never required to install the translated man pages when xz is built from a release tarball. If po4a is missing when building from xz.git, the translated man pages won't be generated but otherwise the build will work normally. The translations are only updated automatically by autogen.sh and by "make mydist". This makes it easy to keep po4a as an optional dependency and ensures that I won't forget to put updated translations to a release tarball. The translated man pages aren't installed if --disable-nls is used. The installation of translated man pages abuses Automake internals by calling "install-man" with redefined dist_man_MANS and man_MANS. This makes the hairy script code slightly less hairy. If it breaks some day, this code needs to be fixed; don't blame Automake developers. Also, this adds more quotes to the existing shell script code in the Makefile.am "-hook"s. Makefile.am | 4 ++++ autogen.sh | 8 ++++--- po4a/.gitignore | 2 ++ po4a/po4a.conf | 14 +++++++++++ po4a/update-po | 45 ++++++++++++++++++++++++++++++++++ src/scripts/Makefile.am | 64 +++++++++++++++++++++++++++++++++++++------------ src/xz/Makefile.am | 50 +++++++++++++++++++++++++++----------- src/xzdec/Makefile.am | 55 ++++++++++++++++++++++++++++++++---------- 8 files changed, 197 insertions(+), 45 deletions(-) commit 882fcfdcd86525cc5c6f6d0bf0230d0089206d13 Author: Lasse Collin Date: 2020-02-06 00:04:42 +0200 Update THANKS (sync with the master branch). THANKS | 3 +++ 1 file changed, 3 insertions(+) commit 134bb7765815d5f265eb0bc9e6ebacd9ae4a52bc Author: Lasse Collin Date: 2020-02-05 22:35:06 +0200 Update tests/.gitignore. .gitignore | 4 ++++ 1 file changed, 4 insertions(+) commit 6912472fafb656be8f4c5b4ac9ea28fea3065de4 Author: Lasse Collin Date: 2020-02-05 22:28:51 +0200 Update m4/.gitignore. m4/.gitignore | 1 + 1 file changed, 1 insertion(+) commit 68c60735bbb6e51d4205ba8a9fde307bcfb22f8c Author: Lasse Collin Date: 2020-02-05 20:47:38 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit e1beaa74bc7cb5a409d59b55870e01ae7784ce3a Author: Lasse Collin Date: 2020-02-05 20:33:50 +0200 xz: Comment out annoying sandboxing messages. src/xz/file_io.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) commit 8238192652290df78bd728b20e3f6542d1a2819e Author: Lasse Collin Date: 2020-02-05 19:33:37 +0200 Build: Workaround a POSIX shell detection problem on Solaris. I don't know if the problem is in gnulib's gl_POSIX_SHELL macro or if xzgrep does something that isn't in POSIX. The workaround adds a special case for Solaris: if /usr/xpg4/bin/sh exists and gl_cv_posix_shell wasn't overriden on the configure command line, use that shell for xzgrep and other scripts. That shell is known to work and exists on most Solaris systems. configure.ac | 10 ++++++++++ 1 file changed, 10 insertions(+) commit 93a1f61e892e145607dd938e3b30098af19a1672 Author: Lasse Collin Date: 2020-02-03 22:03:50 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive. m4/ax_pthread.m4 | 398 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 279 insertions(+), 119 deletions(-) commit d0daa21792ff861e5423bbd82aaa6c8ba9fa0462 Author: Lasse Collin Date: 2020-02-01 19:56:18 +0200 xz: Limit --memlimit-compress to at most 4020 MiB for 32-bit xz. See the code comment for reasoning. It's far from perfect but hopefully good enough for certain cases while hopefully doing nothing bad in other situations. At presets -5 ... -9, 4020 MiB vs. 4096 MiB makes no difference on how xz scales down the number of threads. The limit has to be a few MiB below 4096 MiB because otherwise things like "xz --lzma2=dict=500MiB" won't scale down the dict size enough and xz cannot allocate enough memory. With "ulimit -v $((4096 * 1024))" on x86-64, the limit in xz had to be no more than 4085 MiB. Some safety margin is good though. This is hack but it should be useful when running 32-bit xz on a 64-bit kernel that gives full 4 GiB address space to xz. Hopefully this is enough to solve this: https://bugzilla.redhat.com/show_bug.cgi?id=1196786 FreeBSD has a patch that limits the result in tuklib_physmem() to SIZE_MAX on 32-bit systems. While I think it's not the way to do it, the results on --memlimit-compress have been good. This commit should achieve practically identical results for compression while leaving decompression and tuklib_physmem() and thus lzma_physmem() unaffected. src/xz/hardware.c | 32 +++++++++++++++++++++++++++++++- src/xz/xz.1 | 21 ++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) commit 4433c2dc5727ee6aef570e001a5a024e0d94e609 Author: Lasse Collin Date: 2020-01-26 20:53:25 +0200 xz: Set the --flush-timeout deadline when the first input byte arrives. xz --flush-timeout=2000, old version: 1. xz is started. The next flush will happen after two seconds. 2. No input for one second. 3. A burst of a few kilobytes of input. 4. No input for one second. 5. Two seconds have passed and flushing starts. The first second counted towards the flush-timeout even though there was no pending data. This can cause flushing to occur more often than needed. xz --flush-timeout=2000, after this commit: 1. xz is started. 2. No input for one second. 3. A burst of a few kilobytes of input. The next flush will happen after two seconds counted from the time when the first bytes of the burst were read. 4. No input for one second. 5. No input for another second. 6. Two seconds have passed and flushing starts. src/xz/coder.c | 6 +----- src/xz/file_io.c | 6 +++++- src/xz/mytime.c | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) commit acc0ef3ac80f18e349c6d0252177707105c0a29c Author: Lasse Collin Date: 2020-01-26 20:19:19 +0200 xz: Move flush_needed from mytime.h to file_pair struct in file_io.h. src/xz/coder.c | 3 ++- src/xz/file_io.c | 3 ++- src/xz/file_io.h | 3 +++ src/xz/mytime.c | 3 --- src/xz/mytime.h | 4 ---- 5 files changed, 7 insertions(+), 9 deletions(-) commit 4afe69d30b66812682a2016ee18441958019cbb2 Author: Lasse Collin Date: 2020-01-26 14:49:22 +0200 xz: coder.c: Make writing output a separate function. The same code sequence repeats so it's nicer as a separate function. Note that in one case there was no test for opt_mode != MODE_TEST, but that was only because that condition would always be true, so this commit doesn't change the behavior there. src/xz/coder.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) commit ec26f3ace5f9b260ca91508030f07465ae2f9f78 Author: Lasse Collin Date: 2020-01-26 14:13:42 +0200 xz: Fix semi-busy-waiting in xz --flush-timeout. When input blocked, xz --flush-timeout=1 would wake up every millisecond and initiate flushing which would have nothing to flush and thus would just waste CPU time. The fix disables the timeout when no input has been seen since the previous flush. src/xz/coder.c | 4 ++++ src/xz/file_io.c | 15 +++++++++++---- src/xz/file_io.h | 4 ++++ 3 files changed, 19 insertions(+), 4 deletions(-) commit 38915703241e69a64f133ff9a02ec9100c6019c6 Author: Lasse Collin Date: 2020-01-26 13:47:31 +0200 xz: Refactor io_read() a bit. src/xz/file_io.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) commit f6d24245349cecfae6ec0d2366fa80716c9f6d37 Author: Lasse Collin Date: 2020-01-26 13:37:08 +0200 xz: Update a comment in file_io.h. src/xz/file_io.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit 15b55d5c63d27f81776edb1abc05872a751fc674 Author: Lasse Collin Date: 2020-01-26 13:27:51 +0200 xz: Move the setting of flush_needed in file_io.c to a nicer location. src/xz/file_io.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit 609c7067859146ffc62ac655f6ba53599c891801 Author: Lasse Collin Date: 2020-02-05 19:56:09 +0200 xz: Enable Capsicum sandboxing by default if available. It has been enabled in FreeBSD for a while and reported to work fine. Thanks to Xin Li. INSTALL | 6 ------ configure.ac | 8 ++++---- 2 files changed, 4 insertions(+), 10 deletions(-) commit 00517d125cc26ecece0eebb84c1c3975cd19bee0 Author: Lasse Collin Date: 2019-12-31 22:41:45 +0200 Rename unaligned_read32ne to read32ne, and similarly for the others. src/common/tuklib_integer.h | 64 +++++++++++++++---------------- src/liblzma/common/alone_encoder.c | 2 +- src/liblzma/common/block_header_decoder.c | 2 +- src/liblzma/common/block_header_encoder.c | 2 +- src/liblzma/common/memcmplen.h | 9 ++--- src/liblzma/common/stream_flags_decoder.c | 6 +-- src/liblzma/common/stream_flags_encoder.c | 8 ++-- src/liblzma/lz/lz_encoder_hash.h | 2 +- src/liblzma/lzma/lzma_decoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- src/liblzma/lzma/lzma_encoder_private.h | 3 +- src/liblzma/simple/simple_decoder.c | 2 +- src/liblzma/simple/simple_encoder.c | 2 +- tests/test_block_header.c | 4 +- tests/test_stream_flags.c | 6 +-- 15 files changed, 54 insertions(+), 62 deletions(-) commit 52d89d8443c4a31a69c0701062f2c7711d82bbed Author: Lasse Collin Date: 2019-12-31 00:29:48 +0200 Rename read32ne to aligned_read32ne, and similarly for the others. Using the aligned methods requires more care to ensure that the address really is aligned, so it's nicer if the aligned methods are prefixed. The next commit will remove the unaligned_ prefix from the unaligned methods which in liblzma are used in more places than the aligned ones. src/common/tuklib_integer.h | 56 +++++++++++++++++++++--------------------- src/liblzma/check/crc32_fast.c | 4 +-- src/liblzma/check/crc64_fast.c | 4 +-- 3 files changed, 32 insertions(+), 32 deletions(-) commit 850620468b57d49f16093e5870d1050886fcb37a Author: Lasse Collin Date: 2019-12-31 00:18:24 +0200 Revise tuklib_integer.h and .m4. Add a configure option --enable-unsafe-type-punning to get the old non-conforming memory access methods. It can be useful with old compilers or in some other less typical situations but shouldn't normally be used. Omit the packed struct trick for unaligned access. While it's best in some cases, this is simpler. If the memcpy trick doesn't work, one can request unsafe type punning from configure. Because CRC32/CRC64 code needs fast aligned reads, if no very safe way to do it is found, type punning is used as a fallback. This sucks but since it currently works in practice, it seems to be the least bad option. It's never needed with GCC >= 4.7 or Clang >= 3.6 since these support __builtin_assume_aligned and thus fast aligned access can be done with the memcpy trick. Other things: - Support GCC/Clang __builtin_bswapXX - Cleaner bswap fallback macros - Minor cleanups m4/tuklib_integer.m4 | 43 ++++ src/common/tuklib_integer.h | 488 ++++++++++++++++++++++++-------------------- 2 files changed, 314 insertions(+), 217 deletions(-) commit a45badf0342666462cc6a7107a071418570ab773 Author: Lasse Collin Date: 2019-12-29 22:51:58 +0200 Tests: Hopefully fix test_check.c to work on EBCDIC systems. Thanks to Daniel Richard G. tests/test_check.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) commit c9a8071e6690a8db8a485c075920df254e7c70ea Author: Lasse Collin Date: 2019-09-24 23:02:40 +0300 Scripts: Put /usr/xpg4/bin to the beginning of PATH on Solaris. This adds a configure option --enable-path-for-scripts=PREFIX which defaults to empty except on Solaris it is /usr/xpg4/bin to make POSIX grep and others available. The Solaris case had been documented in INSTALL with a manual fix but it's better to do this automatically since it is needed on most Solaris systems anyway. Thanks to Daniel Richard G. INSTALL | 43 +++++++++++++++++++++++++++++++++++-------- configure.ac | 26 ++++++++++++++++++++++++++ src/scripts/xzdiff.in | 1 + src/scripts/xzgrep.in | 1 + src/scripts/xzless.in | 1 + src/scripts/xzmore.in | 1 + 6 files changed, 65 insertions(+), 8 deletions(-) commit aba140e2df3ff63ad124ae997de16d517b98ca50 Author: Lasse Collin Date: 2019-07-12 18:57:43 +0300 Fix comment typos in tuklib_mbstr* files. src/common/tuklib_mbstr.h | 2 +- src/common/tuklib_mbstr_fw.c | 2 +- src/common/tuklib_mbstr_width.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) commit 710f5bd769a5d2bd8684256c2727d15350ee2ab8 Author: Lasse Collin Date: 2019-07-12 18:30:46 +0300 Add missing include to tuklib_mbstr_width.c. It didn't matter in XZ Utils because sysdefs.h includes string.h anyway. src/common/tuklib_mbstr_width.c | 1 + 1 file changed, 1 insertion(+) commit 0e491aa8cd72e0100cd15c1b9469cd57fae500b0 Author: Lasse Collin Date: 2019-06-25 23:15:21 +0300 liblzma: Fix a buggy comment. src/liblzma/lz/lz_encoder_mf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit bfc245569f340a75bd71ad32a6beba786712683b Author: Lasse Collin Date: 2019-06-25 00:16:06 +0300 configure.ac: Fix a typo in a comment. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit f18eee9d15a22c8449ef395a05f0eb637c4ad253 Author: Lasse Collin Date: 2019-06-25 00:08:13 +0300 Tests: Silence warnings from clang -Wassign-enum. Also changed 999 to 99 so it fits even if lzma_check happened to be 8 bits wide. tests/test_block_header.c | 3 ++- tests/test_stream_flags.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit 25f74554723e8deabc66fed1abf0ec27a4ed19d5 Author: Lasse Collin Date: 2019-06-24 23:52:17 +0300 liblzma: Add a comment. src/liblzma/common/stream_encoder_mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 44eb961f2a51d02420d017bc5ff470360663650c Author: Lasse Collin Date: 2019-06-24 23:45:21 +0300 liblzma: Silence clang -Wmissing-variable-declarations. src/liblzma/check/crc32_table.c | 3 +++ src/liblzma/check/crc64_table.c | 3 +++ 2 files changed, 6 insertions(+) commit 267afcd9955e668c1532b069230c21c348eb4f82 Author: Lasse Collin Date: 2019-06-24 22:57:43 +0300 xz: Silence a warning from clang -Wsign-conversion in main.c. src/xz/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 0e3c4002f809311ecef239b05e556d9c462b5703 Author: Lasse Collin Date: 2019-06-24 22:47:39 +0300 liblzma: Remove incorrect uses of lzma_attribute((__unused__)). Caught by clang -Wused-but-marked-unused. src/liblzma/common/alone_decoder.c | 3 +-- src/liblzma/common/alone_encoder.c | 3 +-- src/liblzma/lz/lz_decoder.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) commit cb708e8fa3405ec13a0ebfebbbf2793f927deab1 Author: Lasse Collin Date: 2019-06-24 20:53:55 +0300 Tests: Silence a warning from -Wsign-conversion. tests/create_compress_files.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit c8cace3d6e965c0fb537591372bf71b9357dd76c Author: Lasse Collin Date: 2019-06-24 20:45:49 +0300 xz: Fix an integer overflow with 32-bit off_t. Or any off_t which isn't very big (like signed 64 bit integer that most system have). A small off_t could overflow if the file being decompressed had long enough run of zero bytes, which would result in corrupt output. src/xz/file_io.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) commit 65a42741e290fbcd85dfc5db8a62c4bce5f7712c Author: Lasse Collin Date: 2019-06-24 00:57:23 +0300 Tests: Remove a duplicate branch from tests/tests.h. The duplication was introduced about eleven years ago and should have been cleaned up back then already. This was caught by -Wduplicated-branches. tests/tests.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) commit 5c4fb60e8df026e933afab0cfe0a8b55be20036c Author: Lasse Collin Date: 2019-06-23 23:22:45 +0300 tuklib_mbstr_width: Fix a warning from -Wsign-conversion. src/common/tuklib_mbstr_width.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 37df03ce52ce53710e1513387648763f8a744154 Author: Lasse Collin Date: 2019-06-23 23:19:34 +0300 xz: Fix some of the warnings from -Wsign-conversion. src/xz/args.c | 4 ++-- src/xz/coder.c | 4 ++-- src/xz/file_io.c | 5 +++-- src/xz/message.c | 4 ++-- src/xz/mytime.c | 4 ++-- src/xz/options.c | 2 +- src/xz/util.c | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) commit 7c65ae0f5f2e2431f88621e8fe6d1dc7907e30c1 Author: Lasse Collin Date: 2019-06-23 22:27:45 +0300 tuklib_cpucores: Silence warnings from -Wsign-conversion. src/common/tuklib_cpucores.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit a502dd1d000b598406637d452f535f4bbd43e2a4 Author: Lasse Collin Date: 2019-06-23 21:40:47 +0300 xzdec: Fix warnings from -Wsign-conversion. src/xzdec/xzdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a45d1a5374ceb22e23255b0b595b9e641e9860af Author: Lasse Collin Date: 2019-06-23 21:38:56 +0300 liblzma: Fix warnings from -Wsign-conversion. Also, more parentheses were added to the literal_subcoder macro in lzma_comon.h (better style but no functional change in the current usage). src/liblzma/common/block_header_decoder.c | 2 +- src/liblzma/delta/delta_decoder.c | 2 +- src/liblzma/lzma/fastpos.h | 2 +- src/liblzma/lzma/lzma2_decoder.c | 8 ++++---- src/liblzma/lzma/lzma_common.h | 3 ++- src/liblzma/lzma/lzma_decoder.c | 16 ++++++++-------- src/liblzma/simple/arm.c | 6 +++--- src/liblzma/simple/armthumb.c | 8 ++++---- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 9 +++++---- src/liblzma/simple/x86.c | 2 +- 11 files changed, 31 insertions(+), 29 deletions(-) commit 4ff87ddf80ed7cb233444cddd86ab1940b5b55ec Author: Lasse Collin Date: 2019-06-23 19:33:55 +0300 tuklib_integer: Silence warnings from -Wsign-conversion. src/common/tuklib_integer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit ed1a9d33984a3a37ae9a775a46859850d98ea4d0 Author: Lasse Collin Date: 2019-06-20 19:40:30 +0300 tuklib_integer: Fix usage of conv macros. Use a temporary variable instead of e.g. conv32le(unaligned_read32ne(buf)) because the macro can evaluate its argument multiple times. src/common/tuklib_integer.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit 612c88dfc08e2c572623954ecfde541d21c84882 Author: Lasse Collin Date: 2019-06-03 20:44:19 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 85da31d8b882b8b9671ab3e3d74d88bd945cd0bb Author: Lasse Collin Date: 2019-06-03 20:41:54 +0300 liblzma: Fix comments. Thanks to Bruce Stark. src/liblzma/common/alone_encoder.c | 4 ++-- src/liblzma/common/block_util.c | 2 +- src/liblzma/common/common.c | 2 +- src/liblzma/common/filter_common.h | 2 +- src/liblzma/common/filter_decoder.h | 2 +- src/liblzma/common/filter_flags_encoder.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) commit 6a73a7889587aa394e236c7e9e4f870b44851036 Author: Lasse Collin Date: 2019-06-02 00:50:59 +0300 liblzma: Fix one more unaligned read to use unaligned_read16ne(). src/liblzma/lz/lz_encoder_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ce59b34ec9ac344d62a57cad5f94f695f42cdaee Author: Lasse Collin Date: 2019-06-01 21:41:55 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 94aa3fb568fe41dd4925a961966ed5cf8213bd1f Author: Lasse Collin Date: 2019-06-01 21:36:13 +0300 liblzma: memcmplen: Use ctz32() from tuklib_integer.h. The same compiler-specific #ifdefs are already in tuklib_integer.h src/liblzma/common/memcmplen.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) commit 412791486dfb430219d8e30bcbebbfc57a99484a Author: Lasse Collin Date: 2019-06-01 21:30:03 +0300 tuklib_integer: Cleanup MSVC-specific code. src/common/tuklib_integer.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) commit efbf6e5f0932e6c1a4250f91ee99059f449f2470 Author: Lasse Collin Date: 2019-06-01 19:01:21 +0300 liblzma: Use unaligned_readXXne functions instead of type punning. Now gcc -fsanitize=undefined should be clean. Thanks to Jeffrey Walton. src/liblzma/common/memcmplen.h | 12 ++++++------ src/liblzma/lzma/lzma_encoder_private.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) commit 29afef03486d461c23f57150ac5436684bff7811 Author: Lasse Collin Date: 2019-06-01 18:41:16 +0300 tuklib_integer: Improve unaligned memory access. Now memcpy() or GNU C packed structs for unaligned access instead of type punning. See the comment in this commit for details. Avoiding type punning with unaligned access is needed to silence gcc -fsanitize=undefined. New functions: unaliged_readXXne and unaligned_writeXXne where XX is 16, 32, or 64. src/common/tuklib_integer.h | 180 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 168 insertions(+), 12 deletions(-) commit 596ed3de4485a4b1d83b5fe506ae9d0a172139b4 Author: Lasse Collin Date: 2019-05-13 20:05:17 +0300 liblzma: Avoid memcpy(NULL, foo, 0) because it is undefined behavior. I should have always known this but I didn't. Here is an example as a reminder to myself: int mycopy(void *dest, void *src, size_t n) { memcpy(dest, src, n); return dest == NULL; } In the example, a compiler may assume that dest != NULL because passing NULL to memcpy() would be undefined behavior. Testing with GCC 8.2.1, mycopy(NULL, NULL, 0) returns 1 with -O0 and -O1. With -O2 the return value is 0 because the compiler infers that dest cannot be NULL because it was already used with memcpy() and thus the test for NULL gets optimized out. In liblzma, if a null-pointer was passed to memcpy(), there were no checks for NULL *after* the memcpy() call, so I cautiously suspect that it shouldn't have caused bad behavior in practice, but it's hard to be sure, and the problematic cases had to be fixed anyway. Thanks to Jeffrey Walton. src/liblzma/common/common.c | 6 +++++- src/liblzma/lz/lz_decoder.c | 12 +++++++++--- src/liblzma/simple/simple_coder.c | 10 +++++++++- 3 files changed, 23 insertions(+), 5 deletions(-) commit b4b83555c576e1d845a2b98a193b23c021437804 Author: Lasse Collin Date: 2019-05-11 20:56:08 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 8d4906262b45557ed164cd74adb270e6ef7f6f03 Author: Lasse Collin Date: 2019-05-11 20:54:12 +0300 xz: Update xz man page date. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 0d318402f8a022f707622c72f8f1894ea476cf89 Author: Antoine Cœur Date: 2019-05-08 13:30:57 +0800 spelling Doxyfile.in | 2 +- NEWS | 2 +- src/liblzma/api/lzma/block.h | 2 +- src/liblzma/api/lzma/hardware.h | 2 +- src/liblzma/api/lzma/lzma12.h | 2 +- src/liblzma/api/lzma/vli.h | 2 +- src/liblzma/common/hardware_physmem.c | 2 +- src/liblzma/common/index.c | 4 ++-- src/liblzma/common/stream_encoder_mt.c | 2 +- src/liblzma/common/vli_decoder.c | 2 +- src/liblzma/lz/lz_decoder.c | 2 +- src/scripts/xzgrep.in | 2 +- src/xz/args.c | 2 +- src/xz/coder.c | 4 ++-- src/xz/main.c | 2 +- src/xz/mytime.h | 2 +- src/xz/private.h | 2 +- src/xz/xz.1 | 2 +- windows/build.bash | 2 +- 19 files changed, 21 insertions(+), 21 deletions(-) commit aeb3be8ac4c4b06a745c3799b80b38159fb78b1a Author: Lasse Collin Date: 2019-03-04 22:49:04 +0200 README: Update translation instructions. XZ Utils is now part of the Translation Project . README | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) commit 0c238dc3feb0a3eea1e713feb8d338c8dfba9f74 Author: Lasse Collin Date: 2018-12-20 20:42:29 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 3ca432d9cce4bf7e793de173dd22025b68611c42 Author: Lasse Collin Date: 2018-12-14 20:34:30 +0200 xz: Fix a crash in progress indicator when in passthru mode. "xz -dcfv not_an_xz_file" crashed (all four options are required to trigger it). It caused xz to call lzma_get_progress(&strm, ...) when no coder was initialized in strm. In this situation strm.internal is NULL which leads to a crash in lzma_get_progress(). The bug was introduced when xz started using lzma_get_progress() to get progress info for multi-threaded compression, so the bug is present in versions 5.1.3alpha and higher. Thanks to Filip Palian for the bug report. src/xz/coder.c | 11 +++++++---- src/xz/message.c | 18 ++++++++++++++++-- src/xz/message.h | 3 ++- 3 files changed, 25 insertions(+), 7 deletions(-) commit fcc419e3c3f77a8b6fc5056a86b1b8abbe266e62 Author: Lasse Collin Date: 2018-11-22 17:20:31 +0200 xz: Update man page timestamp. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5a2fc3cd0194e55df329dd29f805299aaca5f32f Author: Pavel Raiskup Date: 2018-11-22 15:14:34 +0100 'have have' typos src/xz/signals.c | 2 +- src/xz/xz.1 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 7143b04fe49390807f355b1dad686a3d8c4dbdcf Author: Lasse Collin Date: 2018-07-27 18:10:44 +0300 xzless: Rename unused variables to silence static analysers. In this particular case I don't see this affecting readability of the code. Thanks to Pavel Raiskup. src/scripts/xzless.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 273c33297bb69621045ed19665eaf8338bcf4a50 Author: Lasse Collin Date: 2018-07-27 16:02:58 +0300 liblzma: Remove an always-true condition from lzma_index_cat(). This should help static analysis tools to see that newg isn't leaked. Thanks to Pavel Raiskup. src/liblzma/common/index.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 65b4aba6d06d2cd24ba9ad01fa389c238ad8f352 Author: Lasse Collin Date: 2018-05-19 21:23:25 +0300 liblzma: Improve lzma_properties_decode() API documentation. src/liblzma/api/lzma/filter.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 531e78e5a253a3e2c4d4dd1505acaccee48f4083 Author: Lasse Collin Date: 2019-05-01 16:52:36 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 905de7e93528ca5a47039e7e1e5270163f9fc67e Author: Lasse Collin Date: 2019-05-01 16:43:16 +0300 Windows: Update VS version in windows/vs2019/config.h. windows/vs2019/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 0ffd30e172fd18cc619823b2a86448bf56a67e22 Author: Julien Marrec Date: 2019-04-25 17:44:06 +0200 Windows: Upgrade solution itself windows/vs2019/xz_win.sln | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit c2ef96685fc7ca36311649eeb2284b9808292040 Author: Julien Marrec Date: 2019-04-25 17:40:24 +0200 Windows: Upgrade solution with VS2019 windows/vs2019/liblzma.vcxproj | 15 ++++++++------- windows/vs2019/liblzma_dll.vcxproj | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) commit 25fccaf00bea399d8aa026e5b8fa254ce196e6e0 Author: Julien Marrec Date: 2019-04-25 17:39:32 +0200 Windows: Duplicate windows/vs2017 before upgrading windows/vs2019/config.h | 148 ++++++++++++++ windows/vs2019/liblzma.vcxproj | 354 ++++++++++++++++++++++++++++++++++ windows/vs2019/liblzma_dll.vcxproj | 383 +++++++++++++++++++++++++++++++++++++ windows/vs2019/xz_win.sln | 48 +++++ 4 files changed, 933 insertions(+) commit 1424078d6328291c7c524b64328ce9660617cb24 Author: Lasse Collin Date: 2019-01-13 17:29:23 +0200 Windows/VS2017: Omit WindowsTargetPlatformVersion from project files. I understood that if a WTPV is specified, it's often wrong because different VS installations have different SDK version installed. Omitting the WTPV tag makes VS2017 default to Windows SDK 8.1 which often is also missing, so in any case people may need to specify the WTPV before building. But some day in the future a missing WTPV tag will start to default to the latest installed SDK which sounds reasonable: https://developercommunity.visualstudio.com/content/problem/140294/windowstargetplatformversion-makes-it-impossible-t.html Thanks to "dom". windows/INSTALL-MSVC.txt | 4 ++++ windows/vs2017/liblzma.vcxproj | 1 - windows/vs2017/liblzma_dll.vcxproj | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) commit b5be61cc06088bb07f488f9baf7d447ff47b37c1 Author: Lasse Collin Date: 2018-04-29 19:00:06 +0300 Bump version and soname for 5.2.4. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit c47fa6d06745bb2e99866e76b81ac7a9c5a8bfec Author: Lasse Collin Date: 2018-04-29 18:48:00 +0300 extra/scanlzma: Fix compiler warnings. extra/scanlzma/scanlzma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 7b350fe21aa4fd6495a3b6188a40e3f1ae7c0edf Author: Lasse Collin Date: 2018-04-29 18:15:37 +0300 Add NEWS for 5.2.4. NEWS | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) commit 5801591162a280aa52d156dfde42c531ec7fd8b6 Author: Lasse Collin Date: 2018-02-06 19:36:30 +0200 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit c4a616f4536146f8906e1b4412eefeec07b28fae Author: Ben Boeckel Date: 2018-01-29 13:58:18 -0500 nothrow: use noexcept for C++11 and newer In C++11, the `throw()` specifier is deprecated and `noexcept` is preffered instead. src/liblzma/api/lzma.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 0b8947782ff3c5ef830a7f85412e44dcf3cdeb77 Author: Lasse Collin Date: 2018-02-06 18:02:48 +0200 liblzma: Remove incorrect #ifdef from range_common.h. In most cases it was harmless but it could affect some custom build systems. Thanks to Pippijn van Steenhoven. src/liblzma/rangecoder/range_common.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) commit 48f3b9f73ffea7f55d5678997aba0e79d2e82168 Author: Lasse Collin Date: 2018-01-10 22:10:39 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit a3ce3e902342be37c626a561ce3d9ffcf27d0f94 Author: Lasse Collin Date: 2018-01-10 21:54:27 +0200 tuklib_integer: New Intel C compiler needs immintrin.h. Thanks to Melanie Blower (Intel) for the patch. src/common/tuklib_integer.h | 11 +++++++++++ 1 file changed, 11 insertions(+) commit 4505ca483985f88c6923c05a43b4327feaab83b1 Author: Lasse Collin Date: 2017-09-24 20:04:24 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 1ef3cc226e3ce173575c218238b71a4eecabc470 Author: Lasse Collin Date: 2017-09-16 20:36:20 +0300 Windows: Fix paths in VS project files. Some paths use slashes instead of backslashes as directory separators... now it should work (I tested VS2013 version). windows/vs2013/liblzma.vcxproj | 12 ++++++------ windows/vs2013/liblzma_dll.vcxproj | 24 ++++++++++++------------ windows/vs2017/liblzma.vcxproj | 12 ++++++------ windows/vs2017/liblzma_dll.vcxproj | 24 ++++++++++++------------ 4 files changed, 36 insertions(+), 36 deletions(-) commit e775d2a8189d24f60470e6e49d8af881df3a1680 Author: Lasse Collin Date: 2017-09-16 12:54:23 +0300 Windows: Add project files for VS2017. These files match the v5.2 branch (no file info decoder). windows/vs2017/config.h | 148 ++++++++++++++ windows/vs2017/liblzma.vcxproj | 355 ++++++++++++++++++++++++++++++++++ windows/vs2017/liblzma_dll.vcxproj | 384 +++++++++++++++++++++++++++++++++++++ windows/vs2017/xz_win.sln | 48 +++++ 4 files changed, 935 insertions(+) commit 10e02e0fbb6e2173f8b41f6e39b7b570f47dd74d Author: Lasse Collin Date: 2017-09-16 12:39:43 +0300 Windows: Move VS2013 files into windows/vs2013 directory. windows/{ => vs2013}/config.h | 0 windows/{ => vs2013}/liblzma.vcxproj | 278 +++++++++++++++--------------- windows/{ => vs2013}/liblzma_dll.vcxproj | 280 +++++++++++++++---------------- windows/{ => vs2013}/xz_win.sln | 0 4 files changed, 279 insertions(+), 279 deletions(-) commit 06eebd4543196ded36fa9b8b9544195b38b24ef2 Author: Lasse Collin Date: 2017-08-14 20:08:33 +0300 Fix or hide warnings from GCC 7's -Wimplicit-fallthrough. src/liblzma/lzma/lzma_decoder.c | 6 ++++++ src/xz/list.c | 2 ++ 2 files changed, 8 insertions(+) commit ea4ea1dffafebaa8b2770bf3eca46900e4dd22dc Author: Alexey Tourbin Date: 2017-05-16 23:56:35 +0300 Docs: Fix a typo in a comment in doc/examples/02_decompress.c. doc/examples/02_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit eb2ef4c79bf405ea0d215f3b1df3d0eaf5e1d27b Author: Lasse Collin Date: 2017-05-23 18:34:43 +0300 xz: Fix "xz --list --robot missing_or_bad_file.xz". It ended up printing an uninitialized char-array when trying to print the check names (column 7) on the "totals" line. This also changes the column 12 (minimum xz version) to 50000002 (xz 5.0.0) instead of 0 when there are no valid input files. Thanks to kidmin for the bug report. src/xz/list.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit 3ea5dbd9b0d79048e336e40cef3b6d814fb74e13 Author: Lasse Collin Date: 2017-04-24 19:48:47 +0300 Build: Omit pre-5.0.0 entries from the generated ChangeLog. It makes ChangeLog significantly smaller. Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit bae24675936df99064de1502593c006bd902594b Author: Lasse Collin Date: 2017-04-24 19:30:22 +0300 Update the Git repository URL to HTTPS in ChangeLog. ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 70f479211973b5361f4d7cb08ba5be69b4266e7a Author: Lasse Collin Date: 2017-04-19 22:17:35 +0300 Update the home page URLs to HTTPS. COPYING | 2 +- README | 2 +- configure.ac | 2 +- doc/faq.txt | 4 ++-- dos/config.h | 2 +- src/common/common_w32res.rc | 2 +- src/xz/xz.1 | 6 +++--- src/xzdec/xzdec.1 | 4 ++-- windows/README-Windows.txt | 2 +- windows/config.h | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) commit 2a4b2fa75d06a097261a02ecd3cf2b6d449bf754 Author: Lasse Collin Date: 2017-03-30 22:01:54 +0300 xz: Use POSIX_FADV_RANDOM for in "xz --list" mode. xz --list is random access so POSIX_FADV_SEQUENTIAL was clearly wrong. src/xz/file_io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) commit eb25743ade39170cffd9566a1aae272098cce216 Author: Lasse Collin Date: 2017-03-30 19:47:45 +0300 liblzma: Fix lzma_memlimit_set(strm, 0). The 0 got treated specially in a buggy way and as a result the function did nothing. The API doc said that 0 was supposed to return LZMA_PROG_ERROR but it didn't. Now 0 is treated as if 1 had been specified. This is done because 0 is already used to indicate an error from lzma_memlimit_get() and lzma_memusage(). In addition, lzma_memlimit_set() no longer checks that the new limit is at least LZMA_MEMUSAGE_BASE. It's counter-productive for the Index decoder and was actually needed only by the auto decoder. Auto decoder has now been modified to check for LZMA_MEMUSAGE_BASE. src/liblzma/api/lzma/base.h | 7 ++++++- src/liblzma/common/auto_decoder.c | 3 +++ src/liblzma/common/common.c | 6 ++++-- 3 files changed, 13 insertions(+), 3 deletions(-) commit ef36c6362f3f3853f21b8a6359bcd06576ebf207 Author: Lasse Collin Date: 2017-03-30 19:16:55 +0300 liblzma: Similar memlimit fix for stream_, alone_, and auto_decoder. src/liblzma/api/lzma/container.h | 21 +++++++++++++++++---- src/liblzma/common/alone_decoder.c | 5 +---- src/liblzma/common/auto_decoder.c | 5 +---- src/liblzma/common/stream_decoder.c | 5 +---- 4 files changed, 20 insertions(+), 16 deletions(-) commit 57616032650f03840480b696d7878acdd2065521 Author: Lasse Collin Date: 2017-03-30 18:58:18 +0300 liblzma: Fix handling of memlimit == 0 in lzma_index_decoder(). It returned LZMA_PROG_ERROR, which was done to avoid zero as the limit (because it's a special value elsewhere), but using LZMA_PROG_ERROR is simply inconvenient and can cause bugs. The fix/workaround is to treat 0 as if it were 1 byte. It's effectively the same thing. The only weird consequence is that then lzma_memlimit_get() will return 1 even when 0 was specified as the limit. This fixes a very rare corner case in xz --list where a specific memory usage limit and a multi-stream file could print the error message "Internal error (bug)" instead of saying that the memory usage limit is too low. src/liblzma/api/lzma/index.h | 18 +++++++++++------- src/liblzma/common/index_decoder.c | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) commit 3d566cd519017eee1a400e7961ff14058dfaf33c Author: Lasse Collin Date: 2016-12-30 13:26:36 +0200 Bump version and soname for 5.2.3. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 053e624fe33795e779ff736f16ce44a129c829b5 Author: Lasse Collin Date: 2016-12-30 13:25:10 +0200 Update NEWS for 5.2.3. NEWS | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) commit cae412b2b77d7fd88d187ed7659331709311f80d Author: Lasse Collin Date: 2015-04-01 14:45:25 +0300 xz: Fix the Capsicum rights on user_abort_pipe. src/xz/file_io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 9ccbae41000572193b9a09e7102f9e84dc6d96de Author: Lasse Collin Date: 2016-12-28 21:05:22 +0200 Mention potential sandboxing bugs in INSTALL. INSTALL | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit e013a337d3de77cce24360dffe956ea2339489b6 Author: Lasse Collin Date: 2016-11-21 20:24:50 +0200 liblzma: Avoid multiple definitions of lzma_coder structures. Only one definition was visible in a translation unit. It avoided a few casts and temp variables but seems that this hack doesn't work with link-time optimizations in compilers as it's not C99/C11 compliant. Fixes: http://www.mail-archive.com/xz-devel@tukaani.org/msg00279.html src/liblzma/common/alone_decoder.c | 44 +++++---- src/liblzma/common/alone_encoder.c | 34 ++++--- src/liblzma/common/auto_decoder.c | 35 ++++--- src/liblzma/common/block_decoder.c | 41 ++++---- src/liblzma/common/block_encoder.c | 40 ++++---- src/liblzma/common/common.h | 18 ++-- src/liblzma/common/index_decoder.c | 33 ++++--- src/liblzma/common/index_encoder.c | 16 ++-- src/liblzma/common/stream_decoder.c | 50 +++++----- src/liblzma/common/stream_encoder.c | 56 ++++++----- src/liblzma/common/stream_encoder_mt.c | 124 ++++++++++++++----------- src/liblzma/delta/delta_common.c | 25 ++--- src/liblzma/delta/delta_decoder.c | 6 +- src/liblzma/delta/delta_encoder.c | 12 ++- src/liblzma/delta/delta_private.h | 4 +- src/liblzma/lz/lz_decoder.c | 60 ++++++------ src/liblzma/lz/lz_decoder.h | 13 ++- src/liblzma/lz/lz_encoder.c | 57 +++++++----- src/liblzma/lz/lz_encoder.h | 9 +- src/liblzma/lzma/lzma2_decoder.c | 32 ++++--- src/liblzma/lzma/lzma2_encoder.c | 51 +++++----- src/liblzma/lzma/lzma_decoder.c | 27 +++--- src/liblzma/lzma/lzma_encoder.c | 29 +++--- src/liblzma/lzma/lzma_encoder.h | 9 +- src/liblzma/lzma/lzma_encoder_optimum_fast.c | 3 +- src/liblzma/lzma/lzma_encoder_optimum_normal.c | 23 ++--- src/liblzma/lzma/lzma_encoder_private.h | 6 +- src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 61 ++++++------ src/liblzma/simple/simple_private.h | 12 +-- src/liblzma/simple/sparc.c | 2 +- src/liblzma/simple/x86.c | 15 +-- 35 files changed, 532 insertions(+), 423 deletions(-) commit 8e0f1af3dcaec00a3879cce8ad7441edc6359d1c Author: Lasse Collin Date: 2016-12-26 20:50:25 +0200 Document --enable-sandbox configure option in INSTALL. INSTALL | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) commit ce2542d220de06acd618fd9f5c0a6683029fb4eb Author: Lasse Collin Date: 2015-03-31 22:19:34 +0300 xz: Add support for sandboxing with Capsicum (disabled by default). In the v5.2 branch this feature is considered experimental and thus disabled by default. The sandboxing is used conditionally as described in main.c. This isn't optimal but it was much easier to implement than a full sandboxing solution and it still covers the most common use cases where xz is writing to standard output. This should have practically no effect on performance even with small files as fork() isn't needed. C and locale libraries can open files as needed. This has been fine in the past, but it's a problem with things like Capsicum. io_sandbox_enter() tries to ensure that various locale-related files have been loaded before cap_enter() is called, but it's possible that there are other similar problems which haven't been seen yet. Currently Capsicum is available on FreeBSD 10 and later and there is a port to Linux too. Thanks to Loganaden Velvindron for help. configure.ac | 41 +++++++++++++++++++++++++++ src/xz/Makefile.am | 2 +- src/xz/file_io.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xz/file_io.h | 6 ++++ src/xz/main.c | 18 ++++++++++++ src/xz/private.h | 4 +++ 6 files changed, 151 insertions(+), 1 deletion(-) commit 3ca1d5e6320111043e19434da881065fadafa0e4 Author: Lasse Collin Date: 2015-03-31 21:12:30 +0300 Fix bugs and otherwise improve ax_check_capsicum.m4. AU_ALIAS was removed because the new version is incompatible with the old version. It no longer checks for separately. It's enough to test for it as part of AC_CHECK_DECL. The defines HAVE_CAPSICUM_SYS_CAPSICUM_H and HAVE_CAPSICUM_SYS_CAPABILITY_H were removed as unneeded. HAVE_SYS_CAPSICUM_H from AC_CHECK_HEADERS is enough. It no longer does a useless search for the Capsicum library if the header wasn't found. Fixed a bug in ACTION-IF-FOUND (the first argument). Specifying the argument omitted the default action but the given action wasn't used instead. AC_DEFINE([HAVE_CAPSICUM]) is now always called when Capsicum support is found. Previously it was part of the default ACTION-IF-FOUND which a custom action would override. Now the default action only prepends ${CAPSICUM_LIB} to LIBS. The documentation was updated. Since there as no serial number, "#serial 2" was added. m4/ax_check_capsicum.m4 | 103 ++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 52 deletions(-) commit 5f3a742b64197fe8bedb6f05fc6ce5d177d11145 Author: Lasse Collin Date: 2015-03-31 19:20:24 +0300 Add m4/ax_check_capsicum.m4 for detecting Capsicum support. The file was loaded from this web page: https://github.com/google/capsicum-test/blob/dev/autoconf/m4/ax_check_capsicum.m4 Thanks to Loganaden Velvindron for pointing it out for me. m4/ax_check_capsicum.m4 | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) commit d74377e62b4c649e40294dd441de72c0f092e67c Author: Lasse Collin Date: 2015-10-12 20:29:09 +0300 liblzma: Fix a memory leak in error path of lzma_index_dup(). lzma_index_dup() calls index_dup_stream() which, in case of an error, calls index_stream_end() to free memory allocated by index_stream_init(). However, it illogically didn't actually free the memory. To make it logical, the tree handling code was modified a bit in addition to changing index_stream_end(). Thanks to Evan Nemerson for the bug report. src/liblzma/common/index.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) commit f580732216dcf971f3f006fe8e01cd4979e1d964 Author: Lasse Collin Date: 2016-10-24 18:53:25 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 88d7a7fd153bf1355cdf798ffdac7443d0169afc Author: Lasse Collin Date: 2016-10-24 18:51:36 +0300 tuklib_cpucores: Add support for sched_getaffinity(). It's available in glibc (GNU/Linux, GNU/kFreeBSD). It's better than sysconf(_SC_NPROCESSORS_ONLN) because sched_getaffinity() gives the number of cores available to the process instead of the total number of cores online. As a side effect, this commit fixes a bug on GNU/kFreeBSD where configure would detect the FreeBSD-specific cpuset_getaffinity() but it wouldn't actually work because on GNU/kFreeBSD it requires using -lfreebsd-glue when linking. Now the glibc-specific function will be used instead. Thanks to Sebastian Andrzej Siewior for the original patch and testing. m4/tuklib_cpucores.m4 | 30 +++++++++++++++++++++++++++++- src/common/tuklib_cpucores.c | 9 +++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) commit 51baf684376903dbeddd840582bfdf9fa91b311b Author: Lasse Collin Date: 2016-06-30 20:27:36 +0300 xz: Fix copying of timestamps on Windows. xz used to call utime() on Windows, but its result gets lost on close(). Using _futime() seems to work. Thanks to Martok for reporting the bug: http://www.mail-archive.com/xz-devel@tukaani.org/msg00261.html configure.ac | 2 +- src/xz/file_io.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) commit 1ddc479851139d6e8202e5835421bfe6578d9e07 Author: Lasse Collin Date: 2016-06-16 22:46:02 +0300 xz: Silence warnings from -Wlogical-op. Thanks to Evan Nemerson. src/xz/file_io.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit be647ff5ed5a1c244a65722af6ce250259f3b14a Author: Lasse Collin Date: 2016-04-10 20:55:49 +0300 Build: Fix = to += for xz_SOURCES in src/xz/Makefile.am. Thanks to Christian Kujau. src/xz/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit fb6d50c15343831f35305982cefa82053099191d Author: Lasse Collin Date: 2016-04-10 20:54:17 +0300 Build: Bump GNU Gettext version requirement to 0.19. It silences a few warnings and most people probably have 0.19 even on stable distributions. Thanks to Christian Kujau. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 74f8dad9f912a2993768d93d108ea2b0b2c196e0 Author: Lasse Collin Date: 2016-03-13 20:21:49 +0200 liblzma: Disable external SHA-256 by default. This is the sane thing to do. The conflict with OpenSSL on some OSes and especially that the OS-provided versions can be significantly slower makes it clear that it was a mistake to have the external SHA-256 support enabled by default. Those who want it can now pass --enable-external-sha256 to configure. INSTALL was updated with notes about OSes where this can be a bad idea. The SHA-256 detection code in configure.ac had some bugs that could lead to a build failure in some situations. These were fixed, although it doesn't matter that much now that the external SHA-256 is disabled by default. MINIX >= 3.2.0 uses NetBSD's libc and thus has SHA256_Init in libc instead of libutil. Support for the libutil version was removed. INSTALL | 36 ++++++++++++++++++++++ configure.ac | 76 +++++++++++++++++++++++------------------------ src/liblzma/check/check.h | 16 ++++------ 3 files changed, 79 insertions(+), 49 deletions(-) commit ea7f6ff04cb5bb1498088eb09960a4c3f13dfe39 Author: Lasse Collin Date: 2016-03-10 20:27:05 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit d0e018016b311232e82d9a98dc68f1e3dabce794 Author: Lasse Collin Date: 2016-03-10 20:26:49 +0200 Build: Avoid SHA256_Init on FreeBSD and MINIX 3. On FreeBSD 10 and older, SHA256_Init from libmd conflicts with libcrypto from OpenSSL. The OpenSSL version has different sizeof(SHA256_CTX) and it can cause weird problems if wrong SHA256_Init gets used. Looking at the source, MINIX 3 seems to have a similar issue but I'm not sure. To be safe, I disabled SHA256_Init on MINIX 3 too. NetBSD has SHA256_Init in libc and they had a similar problem, but they already fixed it in 2009. Thanks to Jim Wilcoxson for the bug report that helped in finding the problem. configure.ac | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) commit 5daae123915f32a4ed6dc948b831533c2d1beec3 Author: Lasse Collin Date: 2015-11-08 20:16:10 +0200 tuklib_physmem: Hopefully silence a warning on Windows. src/common/tuklib_physmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 491acc406e098167ccb7fce0728b94c2f32cff9f Author: Lasse Collin Date: 2015-11-04 23:17:43 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 8173ff8790ad3502d04e1c07d014cb84a3b8187b Author: Lasse Collin Date: 2015-11-04 23:14:00 +0200 liblzma: Make Valgrind happier with optimized (gcc -O2) liblzma. When optimizing, GCC can reorder code so that an uninitialized value gets used in a comparison, which makes Valgrind unhappy. It doesn't happen when compiled with -O0, which I tend to use when running Valgrind. Thanks to Rich Prohaska. I remember this being mentioned long ago by someone else but nothing was done back then. src/liblzma/lz/lz_encoder.c | 4 ++++ 1 file changed, 4 insertions(+) commit 013de2b5ab8094d2c82a2771f3d143eeb656eda9 Author: Lasse Collin Date: 2015-11-03 20:55:45 +0200 liblzma: Rename lzma_presets.c back to lzma_encoder_presets.c. It would be too annoying to update other build systems just because of this. src/liblzma/lzma/Makefile.inc | 2 +- src/liblzma/lzma/{lzma_presets.c => lzma_encoder_presets.c} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit a322f70ad96de88968c2c36e6a36bc08ae30bd20 Author: Lasse Collin Date: 2015-11-03 20:47:07 +0200 Build: Disable xzdec, lzmadec, and lzmainfo when they cannot be built. They all need decoder support and if that isn't available, there's no point trying to build them. configure.ac | 3 +++ 1 file changed, 3 insertions(+) commit 8ea49606cf6427e32319de7693eca9e43f1c8ad6 Author: Lasse Collin Date: 2015-11-03 20:35:19 +0200 Build: Simplify $enable_{encoders,decoders} usage a bit. configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit 42131a25e52bfe400acfa7df93469a96bb78bb78 Author: Lasse Collin Date: 2015-11-03 20:31:31 +0200 Windows/MSVC: Update config.h. windows/config.h | 6 ++++++ 1 file changed, 6 insertions(+) commit e9184e87cc989d14c7413e6adb3eca98f6ae0290 Author: Lasse Collin Date: 2015-11-03 20:29:58 +0200 DOS: Update config.h. dos/config.h | 6 ++++++ 1 file changed, 6 insertions(+) commit 2296778f3c9a1e3a8699973b09dd3610b8baa402 Author: Lasse Collin Date: 2015-11-03 20:29:33 +0200 xz: Make xz buildable even when encoders or decoders are disabled. The patch is quite long but it's mostly about adding new #ifdefs to omit code when encoders or decoders have been disabled. This adds two new #defines to config.h: HAVE_ENCODERS and HAVE_DECODERS. configure.ac | 4 ++++ src/xz/Makefile.am | 8 ++++++-- src/xz/args.c | 16 ++++++++++++++++ src/xz/coder.c | 33 +++++++++++++++++++++++++-------- src/xz/main.c | 9 +++++++-- src/xz/private.h | 5 ++++- 6 files changed, 62 insertions(+), 13 deletions(-) commit 97a3109281e475d9cf1b5095237d672fa0ad25e5 Author: Lasse Collin Date: 2015-11-03 18:06:40 +0200 Build: Build LZMA1/2 presets also when only decoder is wanted. People shouldn't rely on the presets when decoding raw streams, but xz uses the presets as the starting point for raw decoder options anyway. lzma_encocder_presets.c was renamed to lzma_presets.c to make it clear it's not used solely by the encoder code. src/liblzma/lzma/Makefile.inc | 6 +++++- src/liblzma/lzma/{lzma_encoder_presets.c => lzma_presets.c} | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) commit dc6b78d7f0f6fe43e9d4215146e8581feb8090e7 Author: Lasse Collin Date: 2015-11-03 17:54:48 +0200 Build: Fix configure to handle LZMA1 dependency with LZMA2. Now it gives an error if LZMA1 encoder/decoder is missing when LZMA2 encoder/decoder was requested. Even better would be LZMA2 implicitly enabling LZMA1 but it would need more code. configure.ac | 5 ----- 1 file changed, 5 deletions(-) commit 46d76c9cd3cb26a31f5ae6c3a8bbcf38e6da1add Author: Lasse Collin Date: 2015-11-03 17:41:54 +0200 Build: Don't omit lzma_cputhreads() unless using --disable-threads. Previously it was omitted if encoders were disabled with --disable-encoders. It didn't make sense and it also broke the build. src/liblzma/common/Makefile.inc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit 16d68f874d89f1e4a1919786a35bbaef7d71a077 Author: Lasse Collin Date: 2015-11-02 18:16:51 +0200 liblzma: Fix a build failure related to external SHA-256 support. If an appropriate header and structure were found by configure, but a library with a usable SHA-256 functions wasn't, the build failed. src/liblzma/check/check.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) commit d9311647fc1ab512a3394596221ab8039c00af6b Author: Lasse Collin Date: 2015-11-02 15:19:10 +0200 xz: Always close the file before trying to delete it. unlink() can return EBUSY in errno for open files on some operating systems and file systems. src/xz/file_io.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) commit f59c4183f3c9066626ce45dc3db4642fa603fa21 Author: Lasse Collin Date: 2015-10-12 21:08:42 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 35f189673e280c12e4c5129f9f97e54eef3bbc04 Author: Lasse Collin Date: 2015-10-12 21:07:41 +0300 Tests: Add tests for the two bugs fixed in index.c. tests/test_index.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) commit e10bfdb0fcaff12f3a6dadee51e0a022aadccb51 Author: Lasse Collin Date: 2015-10-12 20:45:15 +0300 liblzma: Fix lzma_index_dup() for empty Streams. Stream Flags and Stream Padding weren't copied from empty Streams. src/liblzma/common/index.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) commit 06f434bd8980f25ca23232eb7bb7df7e37dc8448 Author: Lasse Collin Date: 2015-10-12 20:31:44 +0300 liblzma: Add a note to index.c for those using static analyzers. src/liblzma/common/index.c | 3 +++ 1 file changed, 3 insertions(+) commit 9815cdf6987ef91a85493bfcfd1ce2aaf3b47a0a Author: Lasse Collin Date: 2015-09-29 13:59:35 +0300 Bump version and soname for 5.2.2. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit cbe0cec8476bdd0416c7ca9bc83895c9bea1cf78 Author: Lasse Collin Date: 2015-09-29 13:57:28 +0300 Update NEWS for 5.2.2. NEWS | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 49427ce7eececdd18bbd35dab23c81910d083e1c Author: Andre Noll Date: 2015-05-28 15:50:00 +0200 Fix typo in German translation. As pointed out by Robert Pollak, there's a typo in the German translation of the compression preset option (-0 ... -9) help text. "The compressor" translates to "der Komprimierer", and the genitive form is "des Komprimierers". The old word makes no sense at all. po/de.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 608d6f06c940e7f28c25de005e8b99bdff42d27c Author: Hauke Henningsen Date: 2015-08-17 04:59:54 +0200 Update German translation, mostly wrt orthography Provide an update of the German translation. * A lot of compound words were previously written with spaces, while German orthography is relatively clear in that the components should not be separated. * When referring to the actual process of (de)compression rather than the concept, replace “(De-)Kompression” with “(De-)Komprimierung”. Previously, both forms were used in this context and are now used in a manner consistent with “Komprimierung” being more likely to refer to a process. * Consistently translate “standard input”/“output” * Use “Zeichen” instead of false friend “Charakter” for “character” * Insert commas around relative clauses (as required in German) * Some other minor corrections * Capitalize “ß” as “ẞ” * Consistently start option descriptions in --help with capital letters Acked-By: Andre Noll * Update after msgmerge po/de.po | 383 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 196 insertions(+), 187 deletions(-) commit c8988414e5b67b8ef2fe0ba7b1ccdd0ec73c60d3 Author: Lasse Collin Date: 2015-08-11 13:23:04 +0300 Build: Minor Cygwin cleanup. Some tests used "cygwin*" and some used "cygwin". I changed them all to use "cygwin". Shouldn't affect anything in practice. configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 85a6dfed53477906bfe9a7c0123dd412e391cb48 Author: Lasse Collin Date: 2015-08-11 13:21:52 +0300 Build: Support building of MSYS2 binaries. configure.ac | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) commit 77f270be8432df2e4516a0c48814b6976d6618c5 Author: Lasse Collin Date: 2015-08-09 21:06:26 +0300 Windows: Define DLL_EXPORT when building liblzma.dll with MSVC. src/liblzma/common/common.h uses it to set __declspec(dllexport) for the API symbols. Thanks to Adam Walling. windows/liblzma_dll.vcxproj | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 8c975446c5903090a5a8493b5b96b71003056a88 Author: Lasse Collin Date: 2015-08-09 21:02:20 +0300 Windows: Omit unneeded header files from MSVC project files. windows/liblzma.vcxproj | 5 ----- windows/liblzma_dll.vcxproj | 5 ----- 2 files changed, 10 deletions(-) commit 119a00434954726ca58e4a578e6469f530fca30e Author: Lasse Collin Date: 2015-07-12 20:48:19 +0300 liblzma: A MSVC-specific hack isn't needed with MSVC 2013 and newer. src/liblzma/api/lzma.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) commit d4e7c557fcab353539c9481a8d95cb04bcb15c7c Author: Lasse Collin Date: 2015-06-19 20:38:55 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 98001740ca56c894a7bd32eb47e9857a8a7d878d Author: Lasse Collin Date: 2015-06-19 20:21:30 +0300 Windows: Update the docs. INSTALL | 29 ++++++++----- windows/INSTALL-MSVC.txt | 47 ++++++++++++++++++++++ windows/{INSTALL-Windows.txt => INSTALL-MinGW.txt} | 2 +- 3 files changed, 67 insertions(+), 11 deletions(-) commit 28195e4c877007cc760ecea1d17f740693d66873 Author: Lasse Collin Date: 2015-06-19 17:25:31 +0300 Windows: Add MSVC project files for building liblzma. Thanks to Adam Walling for creating these files. windows/liblzma.vcxproj | 359 ++++++++++++++++++++++++++++++++++++++++ windows/liblzma_dll.vcxproj | 388 ++++++++++++++++++++++++++++++++++++++++++++ windows/xz_win.sln | 48 ++++++ 3 files changed, 795 insertions(+) commit 960440f3230dc628f6966d9f7614fc1b28baf44e Author: Lasse Collin Date: 2015-05-13 20:57:55 +0300 Tests: Fix a memory leak in test_bcj_exact_size. Thanks to Cristian Rodríguez. tests/test_bcj_exact_size.c | 1 + 1 file changed, 1 insertion(+) commit 68cd35acafbdcdf4e8ea8b5bb843c736939d6f8b Author: Lasse Collin Date: 2015-05-12 18:08:24 +0300 Fix NEWS about threading in 5.2.0. Thanks to Andy Hochhaus. NEWS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit ff96ed6d25786728356017a13baf8c14731b4f1e Author: Lasse Collin Date: 2015-05-11 21:26:16 +0300 xz: Document that threaded decompression hasn't been implemented yet. src/xz/xz.1 | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) commit 00d37b64a64ea8597fd2422d5187afd761ab9531 Author: Lasse Collin Date: 2015-04-20 20:20:29 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit db190a832c49ca3aed6d69cc992fa5583cae7b11 Author: Lasse Collin Date: 2015-04-20 19:59:18 +0300 Revert "xz: Use pipe2() if available." This reverts commit 7a11c4a8e5e15f13d5fa59233b3172e65428efdd. It is a problem when libc has pipe2() but the kernel is too old to have pipe2() and thus pipe2() fails. In xz it's pointless to have a fallback for non-functioning pipe2(); it's better to avoid pipe2() completely. Thanks to Michael Fox for the bug report. configure.ac | 4 ++-- src/xz/file_io.c | 9 +-------- 2 files changed, 3 insertions(+), 10 deletions(-) commit eccd8155e107c5ada03d13e7730675cdf1a44ddc Author: Lasse Collin Date: 2015-03-29 22:14:47 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 25263fd9e7a8a913395cb93d7c104cd48c2b4a00 Author: Lasse Collin Date: 2015-03-29 22:13:48 +0300 Fix the detection of installed RAM on QNX. The earlier version compiled but didn't actually work since sysconf(_SC_PHYS_PAGES) always fails (or so I was told). Thanks to Ole André Vadla Ravnås for the patch and testing. m4/tuklib_physmem.m4 | 6 +++--- src/common/tuklib_physmem.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) commit 4c544d2410903d38402221cb783ed85585b6a007 Author: Lasse Collin Date: 2015-03-27 22:39:07 +0200 Fix CPU core count detection on QNX. It tried to use sysctl() on QNX but - it broke the build because sysctl() needs -lsocket on QNX; - sysctl() doesn't work for detecting the core count on QNX even if it compiled. sysconf() works. An alternative would have been to use QNX-specific SYSPAGE_ENTRY(num_cpu) from . Thanks to Ole André Vadla Ravnås. m4/tuklib_cpucores.m4 | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) commit e0ea6737b03e83ccaff4514d00e31bb926f8f0f3 Author: Lasse Collin Date: 2015-03-07 22:05:57 +0200 xz: size_t/uint32_t cleanup in options.c. src/xz/options.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 8bcca29a65335fd679c13814b70b35b68fa5daed Author: Lasse Collin Date: 2015-03-07 22:04:23 +0200 xz: Fix a comment and silence a warning in message.c. src/xz/message.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit f243f5f44c6b19a7c289a0ec73a03ee08364cb5b Author: Lasse Collin Date: 2015-03-07 22:01:00 +0200 liblzma: Silence more uint32_t vs. size_t warnings. src/liblzma/lz/lz_encoder.c | 2 +- src/liblzma/lzma/lzma_encoder.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 7f0a4c50f4a374c40acf4b86848f301ad1e82d34 Author: Lasse Collin Date: 2015-03-07 19:54:00 +0200 xz: Make arg_count an unsigned int to silence a warning. Actually the value of arg_count cannot exceed INT_MAX but it's nicer as an unsigned int. src/xz/args.h | 2 +- src/xz/main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit f6ec46801588b1be29c07c9db98558b521304002 Author: Lasse Collin Date: 2015-03-07 19:33:17 +0200 liblzma: Fix a warning in index.c. src/liblzma/common/index.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit a24518971cc621315af142dd3bb7614fab04ad27 Author: Lasse Collin Date: 2015-02-26 20:46:14 +0200 Build: Fix a CR+LF problem when running autoreconf -fi on OS/2. build-aux/version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit dec11497a71518423b5ff0e759100cf8aadf6c7b Author: Lasse Collin Date: 2015-02-26 16:53:44 +0200 Bump version and soname for 5.2.1. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 29e39c79975ab89ee5dd671e97064534a9f3a649 Author: Lasse Collin Date: 2015-02-26 13:01:09 +0200 Update NEWS for 5.2.1. NEWS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) commit 7a11c4a8e5e15f13d5fa59233b3172e65428efdd Author: Lasse Collin Date: 2015-02-22 19:38:48 +0200 xz: Use pipe2() if available. configure.ac | 4 ++-- src/xz/file_io.c | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) commit 117d962685c72682c63edc9bb765367189800202 Author: Lasse Collin Date: 2015-02-21 23:40:26 +0200 liblzma: Fix a compression-ratio regression in LZMA1/2 in fast mode. The bug was added in the commit f48fce093b07aeda95c18850f5e086d9f2383380 and thus affected 5.1.4beta and 5.2.0. Luckily the bug cannot cause data corruption or other nasty things. src/liblzma/lzma/lzma_encoder_optimum_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ae984e31c167d3bc52972ec422dd1ebd5f5d5719 Author: Lasse Collin Date: 2015-02-21 23:00:19 +0200 xz: Fix the fcntl() usage when creating a pipe for the self-pipe trick. Now it reads the old flags instead of blindly setting O_NONBLOCK. The old code may have worked correctly, but this is better. src/xz/file_io.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) commit 2205bb5853098aea36a56df6f5747037175f66b4 Author: Lasse Collin Date: 2015-02-10 15:29:34 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit d935b0cdf3db440269b9d952b2b281b18f8c7b08 Author: Lasse Collin Date: 2015-02-10 15:28:30 +0200 tuklib_cpucores: Use cpuset_getaffinity() on FreeBSD if available. In FreeBSD, cpuset_getaffinity() is the preferred way to get the number of available cores. Thanks to Rui Paulo for the patch. I edited it slightly, but hopefully I didn't break anything. m4/tuklib_cpucores.m4 | 23 ++++++++++++++++++++++- src/common/tuklib_cpucores.c | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) commit eb61bc58c20769cac4d05f363b9c0e8c9c71a560 Author: Lasse Collin Date: 2015-02-09 22:08:37 +0200 xzdiff: Make the mktemp usage compatible with FreeBSD's mktemp. Thanks to Rui Paulo for the fix. src/scripts/xzdiff.in | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) commit b9a5b6b7a29029680af733082b6a46e0fc01623a Author: Lasse Collin Date: 2015-02-03 21:45:53 +0200 Add a few casts to tuklib_integer.h to silence possible warnings. I heard that Visual Studio 2013 gave warnings without the casts. Thanks to Gabi Davar. src/common/tuklib_integer.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) commit c45757135f40e4a0de730ba5fff0100219493982 Author: Lasse Collin Date: 2015-01-26 21:24:39 +0200 liblzma: Set LZMA_MEMCMPLEN_EXTRA depending on the compare method. src/liblzma/common/memcmplen.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) commit 3c500174ed5485f550972a2a6109c361e875f069 Author: Lasse Collin Date: 2015-01-26 20:40:16 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit fec88d41e672d9e197c9442aecf02bd0dfa6d516 Author: Lasse Collin Date: 2015-01-26 20:39:28 +0200 liblzma: Silence harmless Valgrind errors. Thanks to Torsten Rupp for reporting this. I had forgotten to run Valgrind before the 5.2.0 release. src/liblzma/lz/lz_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) commit a9b45badfec0928d20a27c7176c005fa637f7d1e Author: Lasse Collin Date: 2015-01-09 21:50:19 +0200 xz: Fix comments. src/xz/file_io.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit 541aee6dd4aa97a809aba281475a21b641bb89e2 Author: Lasse Collin Date: 2015-01-09 21:35:06 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4170edc914655310d2363baccf5e615e09b04911 Author: Lasse Collin Date: 2015-01-09 21:34:06 +0200 xz: Don't fail if stdout doesn't support O_NONBLOCK. This is similar to the case with stdin. Thanks to Brad Smith for the bug report and testing on OpenBSD. src/xz/file_io.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) commit 04bbc0c2843c50c8ad1cba42b937118e38b0508d Author: Lasse Collin Date: 2015-01-07 19:18:20 +0200 xz: Fix a memory leak in DOS-specific code. src/xz/file_io.c | 2 ++ 1 file changed, 2 insertions(+) commit f0f1f6c7235ffa901cf76fe18e33749e200b3eea Author: Lasse Collin Date: 2015-01-07 19:08:06 +0200 xz: Don't fail if stdin doesn't support O_NONBLOCK. It's a problem at least on OpenBSD which doesn't support O_NONBLOCK on e.g. /dev/null. I'm not surprised if it's a problem on other OSes too since this behavior is allowed in POSIX-1.2008. The code relying on this behavior was committed in June 2013 and included in 5.1.3alpha released on 2013-10-26. Clearly the development releases only get limited testing. src/xz/file_io.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) commit d2d484647d9d9d679f03c75abb0404f67069271c Author: Lasse Collin Date: 2015-01-06 20:30:15 +0200 Tests: Don't hide unexpected error messages in test_files.sh. Hiding them makes no sense since normally there's no error when testing the "good" files. With "bad" files errors are expected and then it makes sense to keep the messages hidden. tests/test_files.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit aae6a6aeda51cf94a47e39ad624728f9bee75e30 Author: Lasse Collin Date: 2014-12-30 11:17:16 +0200 Update Solaris notes in INSTALL. Mention the possible "make check" failure on Solaris in the Solaris-specific section of INSTALL. It was already in section 4.5 but it is better mention it in the OS-specific section too. INSTALL | 4 ++++ 1 file changed, 4 insertions(+) commit 7815112153178800a3521b9f31960e7cdc26cfba Author: Lasse Collin Date: 2014-12-26 12:00:05 +0200 Build: POSIX shell isn't required if scripts are disabled. INSTALL | 3 ++- configure.ac | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) commit a0cd05ee71d330b79ead6eb9222e1b24e1559d3a Author: Lasse Collin Date: 2014-12-21 20:48:37 +0200 DOS: Update Makefile. dos/Makefile | 1 + 1 file changed, 1 insertion(+) commit b85ee0905ec4ab7656d22e63519fdd3bedb21f2e Author: Lasse Collin Date: 2014-12-21 19:50:38 +0200 Windows: Fix bin_i486 to bin_i686 in build.bash. windows/build.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit cbafa710918195dbba3db02c3fab4f0538235206 Author: Lasse Collin Date: 2014-12-21 18:58:44 +0200 Docs: Use lzma_cputhreads() in 04_compress_easy_mt.c. doc/examples/04_compress_easy_mt.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) commit 8dbb57238d372c7263cfeb3e7f7fd9a73173156a Author: Lasse Collin Date: 2014-12-21 18:56:44 +0200 Docs: Update docs/examples/00_README.txt. doc/examples/00_README.txt | 4 ++++ 1 file changed, 4 insertions(+) commit 6060f7dc76fd6c2a8a1f8e85d0e4d86bb78273e6 Author: Lasse Collin Date: 2014-12-21 18:11:17 +0200 Bump version and soname for 5.2.0. I know that soname != app version, but I skip AGE=1 in -version-info to make the soname match the liblzma version anyway. It doesn't hurt anything as long as it doesn't conflict with library versioning rules. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 6 +++--- src/liblzma/liblzma.map | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) commit 3e8bd1d15e417f2d588e9be50ce027ee3d48b2da Author: Lasse Collin Date: 2014-12-21 18:05:03 +0200 Avoid variable-length arrays in the debug programs. debug/full_flush.c | 3 ++- debug/sync_flush.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) commit 72f7307cfdceb941aeb2bf30d424cc0d13621786 Author: Lasse Collin Date: 2014-12-21 18:01:45 +0200 Build: Include 04_compress_easy_mt.c in the tarball. Makefile.am | 1 + 1 file changed, 1 insertion(+) commit 2cb82ff21c62def11f3683a8bb0aaf363102aaa0 Author: Lasse Collin Date: 2014-12-21 18:00:38 +0200 Fix build when --disable-threads is used. src/common/mythread.h | 2 ++ 1 file changed, 2 insertions(+) commit 9b9e3536e458ef958f66b0e8982efc9d36de4d17 Author: Adrien Nader Date: 2014-12-21 15:56:15 +0100 po/fr: improve wording for help for --lzma1/--lzma2. po/fr.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a8b6b569e7fadbf5b5b9139d53bc764015c15027 Author: Adrien Nader Date: 2014-12-21 15:55:48 +0100 po/fr: missing line in translation of --extreme. po/fr.po | 1 + 1 file changed, 1 insertion(+) commit f168a6fd1a888cf4f0caaddcafcb21dadc6ab6e9 Author: Lasse Collin Date: 2014-12-21 14:32:33 +0200 Update NEWS for 5.2.0. NEWS | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) commit cec2ee863b3a88f4bf039cb00f73c4a4fc93a429 Author: Lasse Collin Date: 2014-12-21 14:32:22 +0200 Update NEWS for 5.0.8. NEWS | 12 ++++++++++++ 1 file changed, 12 insertions(+) commit 42e97a32649bf53ce43be2258b902a417c6e7fa1 Author: Lasse Collin Date: 2014-12-21 14:07:54 +0200 xz: Fix a comment. src/xz/options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 29b95d5d6665cedffa6a9d6d3d914f981e852182 Author: Lasse Collin Date: 2014-12-20 20:43:14 +0200 Update INSTALL about the dependencies of the scripts. INSTALL | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) commit 3af91040bb42c21afbb81f5568c3313125e61192 Author: Lasse Collin Date: 2014-12-20 20:42:33 +0200 Windows: Update build instructions. INSTALL | 15 +++++++++------ windows/INSTALL-Windows.txt | 44 +++++++++++++++++++++----------------------- 2 files changed, 30 insertions(+), 29 deletions(-) commit 0152f72bf6289d744823dc6c849538f3a139ad70 Author: Lasse Collin Date: 2014-12-20 20:41:48 +0200 Windows: Update the build script and README-Windows.txt. The 32-bit build is now for i686 or newer because the prebuilt MinGW-w64 toolchains include i686 code in the executables even if one uses -march=i486. The build script builds 32-bit SSE2 enabled version too. Run-time detection of SSE2 support would be nice (on any OS) but it's not implemented in XZ Utils yet. windows/README-Windows.txt | 30 ++++++++++++++++-------------- windows/build.bash | 23 ++++++++++++++--------- 2 files changed, 30 insertions(+), 23 deletions(-) commit 4a1f6133ee5533cee8d91e06fcc22443e5f1881a Author: Lasse Collin Date: 2014-12-19 15:51:50 +0200 Windows: Define TUKLIB_SYMBOL_PREFIX in config.h. It is to keep all symbols in the lzma_ namespace. windows/config.h | 3 +++ 1 file changed, 3 insertions(+) commit 7f7d093de79eee0c7dbfd7433647e46302f19f82 Author: Lasse Collin Date: 2014-12-16 21:00:09 +0200 xz: Update the man page about --threads. src/xz/xz.1 | 5 ----- 1 file changed, 5 deletions(-) commit 009823448b82aa5f465668878a544c5842885407 Author: Lasse Collin Date: 2014-12-16 20:57:43 +0200 xz: Update the man page about --block-size. src/xz/xz.1 | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) commit 7dddfbeb499e528940bc12047355c184644aafe9 Author: Adrien Nader Date: 2014-12-10 22:26:57 +0100 po/fr: several more translation updates: reword and handle --ignore-check. po/fr.po | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) commit 6eca5be40e04ddc4b738d493e4e56835956d8b69 Author: Adrien Nader Date: 2014-12-10 22:23:01 +0100 po/fr: yet another place where my email address had to be updated. po/fr.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit d1003673e92ba47edd6aeeb3dbea05c18269d0e7 Author: Adrien Nader Date: 2014-12-10 22:22:20 +0100 po/fr: fix several typos that have been around since the beginning. po/fr.po | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) commit 4c5aa911a0df027e46171e368debc543d2fa72b2 Author: Adrien Nader Date: 2014-12-03 20:02:31 +0100 po/fr: last batch of new translations for now. Four new error messages. po/fr.po | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) commit 3e3099e36d27059499e7996fb38a62e8ab01d356 Author: Adrien Nader Date: 2014-12-03 20:01:32 +0100 po/fr: translations for --threads, --block-size and --block-list. po/fr.po | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) commit e7d96a5933eec4e9d4a62569ee88df0ebb0f1d53 Author: Adrien Nader Date: 2014-12-03 20:00:53 +0100 po/fr: remove fuzzy marker for error messages that will be kept in English. The following is a copy of a comment inside fr.po: Note from translator on "file status flags". The following entry is kept un-translated on purpose. It is difficult to translate and should only happen in exceptional circumstances which means that translating would: - lose some of the meaning - make it more difficult to look up in search engines; it might happen one in a million times, if we dilute the error message in 20 languages, it will be almost impossible to find an explanation and support for the error. po/fr.po | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) commit 46cbb9033af8a21fafe543302d6919746e0d72af Author: Adrien Nader Date: 2014-12-03 19:58:25 +0100 po/fr: several minor updates and better wording. Meaning doesn't change at all: it's only for better wording and/or formatting of a few strings. po/fr.po | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) commit 7ce49d444f04e73145f79c832eb4d510594b074a Author: Adrien Nader Date: 2014-12-03 19:56:12 +0100 po/fr: update my email address and copyright years. po/fr.po | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 214c553ebc3047cd720da1ce5c80cf7c38118d3c Author: Adrien Nader Date: 2014-11-26 10:08:26 +0100 fr.po: commit file after only "update-po" so actual is readable. po/fr.po | 311 ++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 199 insertions(+), 112 deletions(-) commit 1190c641af09cde85f8bd0fbe5c4906f4a29431b Author: Lasse Collin Date: 2014-12-02 20:04:07 +0200 liblzma: Document how lzma_mt.block_size affects memory usage. src/liblzma/api/lzma/container.h | 4 ++++ 1 file changed, 4 insertions(+) commit e4fc1d2f9571fba79ce383595be2ea2a9257def0 Author: Lasse Collin Date: 2014-11-28 20:07:18 +0200 Update INSTALL about a "make check" failure in test_scripts.sh. INSTALL | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) commit 34f9e40a0a0c3bd2c2730cdb9cd550bbb8a3f2fe Author: Lasse Collin Date: 2014-11-26 20:12:27 +0200 Remove LZMA_UNSTABLE macro. src/liblzma/api/lzma/container.h | 4 ---- src/liblzma/common/common.h | 2 -- src/xz/private.h | 1 - 3 files changed, 7 deletions(-) commit 6d9c0ce9f2677b159e32b224aba5b535b304a705 Author: Lasse Collin Date: 2014-11-26 20:10:33 +0200 liblzma: Update lzma_stream_encoder_mt() API docs. src/liblzma/api/lzma/container.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 2301f3f05dd9742f42cda8f0f318864f5dc39ab3 Author: Lasse Collin Date: 2014-11-25 12:32:05 +0200 liblzma: Verify the filter chain in threaded encoder initialization. This way an invalid filter chain is detected at the Stream encoder initialization instead of delaying it to the first call to lzma_code() which triggers the initialization of the actual filter encoder(s). src/liblzma/common/stream_encoder_mt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) commit 107a263d5bb63cd3593fd6a5c938706539f84523 Author: Lasse Collin Date: 2014-11-17 19:11:49 +0200 Build: Update m4/ax_pthread.m4 from Autoconf Archive. m4/ax_pthread.m4 | 71 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 24 deletions(-) commit b13a781833399ff5726cfc997f3cb2f0acbdbf31 Author: Lasse Collin Date: 2014-11-17 18:52:21 +0200 Build: Replace obsolete AC_HELP_STRING with AS_HELP_STRING. configure.ac | 36 ++++++++++++++++++------------------ m4/tuklib_integer.m4 | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) commit 542cac122ed3550148a2af0033af22b757491378 Author: Lasse Collin Date: 2014-11-17 18:43:19 +0200 Build: Fix Autoconf warnings about escaped backquotes. Thanks to Daniel Richard G. for pointing out that it's good to sometimes run autoreconf -fi with -Wall. configure.ac | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) commit 7b03a15cea8cd4f19ed680b51c4bcbae3ce4142f Author: Lasse Collin Date: 2014-11-10 18:54:40 +0200 xzdiff: Use mkdir if mktemp isn't available. src/scripts/xzdiff.in | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) commit f8c13e5e3609581d5dd9f8777985ca07f2390ad7 Author: Lasse Collin Date: 2014-11-10 18:45:01 +0200 xzdiff: Create a temporary directory to hold a temporary file. This avoids the possibility of "File name too long" when creating a temp file when the input file name is very long. This also means that other users on the system can no longer see the input file names in /tmp (or whatever $TMPDIR is) since the temporary directory will have a generic name. This usually doesn't matter since on many systems one can see the arguments given to all processes anyway. The number X chars to mktemp where increased from 6 to 10. Note that with some shells temp files or dirs won't be used at all. src/scripts/xzdiff.in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit 7716dcf9df7f457500cb657314e7a9aea5fedb06 Author: Lasse Collin Date: 2014-11-10 15:38:47 +0200 liblzma: Fix lzma_mt.preset in lzma_stream_encoder_mt_memusage(). It read the filter chain from a wrong variable. This is a similar bug that was fixed in 9494fb6d0ff41c585326f00aa8f7fe58f8106a5e. src/liblzma/common/stream_encoder_mt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit 230fa4a605542c84b4178a57381695a0af4e779b Author: Lasse Collin Date: 2014-11-10 14:49:55 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4e4ae08bc7c1711e399c9f2d26eb375d39d08101 Author: Lasse Collin Date: 2014-10-29 21:28:25 +0200 Update .gitignore files. .gitignore | 2 ++ m4/.gitignore | 3 +++ 2 files changed, 5 insertions(+) commit c923b140b27d1a055db6284e10fd546ad1a7fcdb Author: Lasse Collin Date: 2014-10-29 21:15:35 +0200 Build: Prepare to support Automake's subdir-objects. Due to a bug in Automake, subdir-objects won't be enabled for now. http://debbugs.gnu.org/cgi/bugreport.cgi?bug=17354 Thanks to Daniel Richard G. for the original patches. configure.ac | 7 ++++++- src/Makefile.am | 22 +++++++++++++++++++++- src/liblzma/Makefile.am | 4 ++-- src/lzmainfo/Makefile.am | 4 ++-- src/xz/Makefile.am | 10 +++++----- src/xzdec/Makefile.am | 8 ++++---- 6 files changed, 40 insertions(+), 15 deletions(-) commit 08c2aa16bea0df82828f665d51fba2e0a5e8997f Author: Lasse Collin Date: 2014-10-24 20:09:29 +0300 Translations: Update the Italian translation. Thanks to Milo Casagrande. po/it.po | 452 ++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 275 insertions(+), 177 deletions(-) commit 2f9f61aa83539c54ff6c118a2693890f0519b3dd Author: Lasse Collin Date: 2014-10-18 18:51:45 +0300 Translations: Update the Polish translation. Thanks to Jakub Bogusz. po/pl.po | 332 ++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 214 insertions(+), 118 deletions(-) commit 4f9d233f67aea25e532824d11b7642cf7dee7a76 Author: Andre Noll Date: 2014-10-14 17:30:30 +0200 l10n: de.po: Change translator email address. Although the old address is still working, the new one should be preferred. So this commit changes all three places in de.po accordingly. Signed-off-by: Andre Noll po/de.po | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 00502b2bedad43f0cc167ac17ae0608837ee196b Author: Andre Noll Date: 2014-10-14 17:30:29 +0200 l10n: de.po: Update German translation Signed-off-by: Andre Noll po/de.po | 531 +++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 281 insertions(+), 250 deletions(-) commit 706b0496753fb609e69f1570ec603f11162189d1 Author: Andre Noll Date: 2014-10-14 17:30:28 +0200 l10n: de.po: Fix typo: Schießen -> Schließen. That's a funny one since "schießen" means to shoot :) Signed-off-by: Andre Noll po/de.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 7c32e6a935c3d7ee366abad1679bd5f322f0c7d4 Author: Lasse Collin Date: 2014-10-09 19:42:26 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 076258cc458f1e705041ac7a729b15ffe8c5214a Author: Lasse Collin Date: 2014-10-09 19:41:51 +0300 Add support for AmigaOS/AROS to tuklib_physmem(). Thanks to Fredrik Wikstrom. m4/tuklib_physmem.m4 | 3 ++- src/common/tuklib_physmem.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) commit efa7b0a210e1baa8e128fc98c5443a944c39ad24 Author: Lasse Collin Date: 2014-10-09 18:42:14 +0300 xzgrep: Avoid passing both -q and -l to grep. The behavior of grep -ql varies: - GNU grep behaves like grep -q. - OpenBSD grep behaves like grep -l. POSIX doesn't make it 100 % clear what behavior is expected. Anyway, using both -q and -l at the same time makes no sense so both options simply should never be used at the same time. Thanks to Christian Weisgerber. src/scripts/xzgrep.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) commit 9c5f76098c9986b48d2fc574a0b764f4cde0c538 Author: Trần Ngọc Quân Date: 2014-09-25 09:22:45 +0700 l10n: vi.po: Update Vietnamese translation Signed-off-by: Trần Ngọc Quân po/vi.po | 136 +++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 84 insertions(+), 52 deletions(-) commit c4911f2db36d811896c73c008b4218d8fa9a4730 Author: Lasse Collin Date: 2014-09-25 18:38:48 +0300 Build: Detect supported compiler warning flags better. Clang and nowadays also GCC accept any -Wfoobar option but then may give a warning that an unknown warning option was specified. To avoid adding unsupported warning options, the options are now tested with -Werror. Thanks to Charles Diza. configure.ac | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 76e75522ed6f5c228d55587dee5a997893f6e474 Author: Lasse Collin Date: 2014-09-20 21:01:21 +0300 Update NEWS for 5.0.7. NEWS | 11 +++++++++++ 1 file changed, 11 insertions(+) commit d62028b4c1174fc67b6929f126f5eb24c018c700 Author: Lasse Collin Date: 2014-09-20 19:42:56 +0300 liblzma: Fix a portability problem in Makefile.am. POSIX supports $< only in inference rules (suffix rules). Using it elsewhere is a GNU make extension and doesn't work e.g. with OpenBSD make. Thanks to Christian Weisgerber for the patch. src/liblzma/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit c35de31d4283edad3e57d37ffe939406542cb7bb Author: Lasse Collin Date: 2014-09-14 21:54:09 +0300 Bump the version number to 5.1.4beta. src/liblzma/api/lzma/version.h | 4 ++-- src/liblzma/liblzma.map | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) commit e9e097e22cacdaa23e5414fea7913535449cb340 Author: Lasse Collin Date: 2014-09-14 21:50:13 +0300 Update NEWS for 5.0.6 and 5.1.4beta. NEWS | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) commit 642f856bb8562ab66704b1e01ac7bc08b6d0a663 Author: Lasse Collin Date: 2014-09-14 21:02:41 +0300 Update TODO. TODO | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) commit 6b5e3b9eff5b8cedb2aac5f524d4d60fc8a48124 Author: Lasse Collin Date: 2014-08-05 22:32:36 +0300 xz: Add --ignore-check. src/xz/args.c | 7 +++++++ src/xz/args.h | 1 + src/xz/coder.c | 10 +++++++++- src/xz/message.c | 2 ++ src/xz/xz.1 | 19 +++++++++++++++++++ 5 files changed, 38 insertions(+), 1 deletion(-) commit 9adbc2ff373f979c917cdfd3679ce0ebd59f1040 Author: Lasse Collin Date: 2014-08-05 22:15:07 +0300 liblzma: Add support for LZMA_IGNORE_CHECK. src/liblzma/api/lzma/container.h | 24 ++++++++++++++++++++++++ src/liblzma/common/common.h | 1 + src/liblzma/common/stream_decoder.c | 14 ++++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) commit 0e0f34b8e4f1c60ecaec15c2105982381cc9c3e6 Author: Lasse Collin Date: 2014-08-05 22:03:30 +0300 liblzma: Add support for lzma_block.ignore_check. Note that this slightly changes how lzma_block_header_decode() has been documented. Earlier it said that the .version is set to the lowest required value, but now it says that the .version field is kept unchanged if possible. In practice this doesn't affect any old code, because before this commit the only possible .version was 0. src/liblzma/api/lzma/block.h | 50 ++++++++++++++++++++++++------- src/liblzma/common/block_buffer_encoder.c | 2 +- src/liblzma/common/block_decoder.c | 18 ++++++++--- src/liblzma/common/block_encoder.c | 2 +- src/liblzma/common/block_header_decoder.c | 12 ++++++-- src/liblzma/common/block_header_encoder.c | 2 +- src/liblzma/common/block_util.c | 2 +- 7 files changed, 68 insertions(+), 20 deletions(-) commit 71e1437ab585b46f7a25f5a131557d3d1c0cbaa2 Author: Lasse Collin Date: 2014-08-04 19:25:58 +0300 liblzma: Use lzma_memcmplen() in the BT3 match finder. I had missed this when writing the commit 5db75054e900fa06ef5ade5f2c21dffdd5d16141. Thanks to Jun I Jin. src/liblzma/lz/lz_encoder_mf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) commit 41dc9ea06e1414ebe8ef52afc8fc15b6e3282b04 Author: Lasse Collin Date: 2014-08-04 00:25:44 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 5dcffdbcc23a68abc3ac3539b30be71bc9b5af84 Author: Lasse Collin Date: 2014-08-03 21:32:25 +0300 liblzma: SHA-256: Optimize the Maj macro slightly. The Maj macro is used where multiple things are added together, so making Maj a sum of two expressions allows some extra freedom for the compiler to schedule the instructions. I learned this trick from . src/liblzma/check/sha256.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit a9477d1e0c6fd0e47e637d051e7b9e2a5d9af517 Author: Lasse Collin Date: 2014-08-03 21:08:12 +0300 liblzma: SHA-256: Optimize the way rotations are done. This looks weird because the rotations become sequential, but it helps quite a bit on both 32-bit and 64-bit x86: - It requires fewer instructions on two-operand instruction sets like x86. - It requires one register less which matters especially on 32-bit x86. I hope this doesn't hurt other archs. I didn't invent this idea myself, but I don't remember where I saw it first. src/liblzma/check/sha256.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) commit 5a76c7c8ee9a0afbeedb1c211db9224260404347 Author: Lasse Collin Date: 2014-08-03 20:38:13 +0300 liblzma: SHA-256: Remove the GCC #pragma that became unneeded. The unrolling in the previous commit should avoid the situation where a compiler may think that an uninitialized variable might be accessed. src/liblzma/check/sha256.c | 5 ----- 1 file changed, 5 deletions(-) commit 9a096f8e57509775c331950b8351bbca77bdcfa8 Author: Lasse Collin Date: 2014-08-03 20:33:38 +0300 liblzma: SHA-256: Unroll a little more. This way a branch isn't needed for each operation to choose between blk0 and blk2, and still the code doesn't grow as much as it would with full unrolling. src/liblzma/check/sha256.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) commit bc7650d87bf27f85f1a2a806dc2db1780e09e6a5 Author: Lasse Collin Date: 2014-08-03 19:56:43 +0300 liblzma: SHA-256: Do the byteswapping without a temporary buffer. src/liblzma/check/sha256.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) commit 544aaa3d13554e8640f9caf7db717a96360ec0f6 Author: Lasse Collin Date: 2014-07-25 22:38:28 +0300 liblzma: Use lzma_memcmplen() in normal mode of LZMA. Two locations were not changed yet because the simplest change assumes that the initial "len" may be greater than "limit". src/liblzma/lzma/lzma_encoder_optimum_normal.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) commit f48fce093b07aeda95c18850f5e086d9f2383380 Author: Lasse Collin Date: 2014-07-25 22:30:38 +0300 liblzma: Simplify LZMA fast mode code by using memcmp(). src/liblzma/lzma/lzma_encoder_optimum_fast.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) commit 6bf5308e34e23dede5b301b1b9b4f131dacd9218 Author: Lasse Collin Date: 2014-07-25 22:29:49 +0300 liblzma: Use lzma_memcmplen() in fast mode of LZMA. src/liblzma/lzma/lzma_encoder_optimum_fast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 353212137e51e45b105a3a3fc2e6879f1cf0d492 Author: Lasse Collin Date: 2014-07-25 21:16:23 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 5db75054e900fa06ef5ade5f2c21dffdd5d16141 Author: Lasse Collin Date: 2014-07-25 21:15:07 +0300 liblzma: Use lzma_memcmplen() in the match finders. This doesn't change the match finder output. src/liblzma/lz/lz_encoder.c | 13 ++++++++++++- src/liblzma/lz/lz_encoder_mf.c | 33 +++++++++++---------------------- 2 files changed, 23 insertions(+), 23 deletions(-) commit e1c8f1d01f4a4e2136173edab2dc63c71ef038f4 Author: Lasse Collin Date: 2014-07-25 20:57:20 +0300 liblzma: Add lzma_memcmplen() for fast memory comparison. This commit just adds the function. Its uses will be in separate commits. This hasn't been tested much yet and it's perhaps a bit early to commit it but if there are bugs they should get found quite quickly. Thanks to Jun I Jin from Intel for help and for pointing out that string comparison needs to be optimized in liblzma. configure.ac | 13 +++ src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/memcmplen.h | 170 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+) commit 765735cf52e5123586e74a51b9c073b5257f631f Author: Lasse Collin Date: 2014-07-12 21:10:09 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 59da01785ef66c7e62f36e70ca808fd2824bb995 Author: Lasse Collin Date: 2014-07-12 20:06:08 +0300 Translations: Add Vietnamese translation. Thanks to Trần Ngọc Quân. po/LINGUAS | 1 + po/vi.po | 1007 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1008 insertions(+) commit 17215f751c354852700e7f8592ccf319570a0721 Author: Lasse Collin Date: 2014-06-29 20:54:14 +0300 xz: Update the help message of a few options. Updated: --threads, --block-size, and --block-list Added: --flush-timeout src/xz/message.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) commit 96864a6ddf91ad693d102ea165f3d7918744d582 Author: Lasse Collin Date: 2014-06-18 22:07:06 +0300 xz: Use lzma_cputhreads() instead of own copy of tuklib_cpucores(). src/xz/Makefile.am | 1 - src/xz/hardware.c | 12 +++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) commit a115cc3748482e277f42a968baa3cd266f031dba Author: Lasse Collin Date: 2014-06-18 22:04:24 +0300 liblzma: Add lzma_cputhreads(). src/liblzma/Makefile.am | 8 +++++++- src/liblzma/api/lzma/hardware.h | 14 ++++++++++++++ src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/hardware_cputhreads.c | 22 ++++++++++++++++++++++ src/liblzma/liblzma.map | 1 + 5 files changed, 45 insertions(+), 1 deletion(-) commit 3ce3e7976904fbab4e6482bafa442856f77a51fa Author: Lasse Collin Date: 2014-06-18 19:11:52 +0300 xz: Check for filter chain compatibility for --flush-timeout. This avoids LZMA_PROG_ERROR from lzma_code() with filter chains that don't support LZMA_SYNC_FLUSH. src/xz/coder.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) commit 381ac14ed79e5d38809f251705be8b3193bba417 Author: Lasse Collin Date: 2014-06-13 19:21:54 +0300 xzgrep: List xzgrep_expected_output in tests/Makefile.am. tests/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 4244b65b06d5ecaf6f9dd0387ac7e3166bd2364e Author: Lasse Collin Date: 2014-06-13 18:58:22 +0300 xzgrep: Improve the test script. Now it should be close to the functionality of the original version by Pavel Raiskup. tests/Makefile.am | 3 ++- tests/test_scripts.sh | 24 ++++++++++++++---------- tests/xzgrep_expected_output | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 11 deletions(-) commit 1e60f2c0a0ee6c18b02943ce56214799a70aac26 Author: Lasse Collin Date: 2014-06-11 21:03:25 +0300 xzgrep: Add a test for the previous fix. This is a simplified version of Pavel Raiskup's original patch. tests/test_scripts.sh | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) commit ceca37901783988204caaf40dff4623d535cc789 Author: Lasse Collin Date: 2014-06-11 20:43:28 +0300 xzgrep: exit 0 when at least one file matches. Mimic the original grep behavior and return exit_success when at least one xz compressed file matches given pattern. Original bugreport: https://bugzilla.redhat.com/show_bug.cgi?id=1108085 Thanks to Pavel Raiskup for the patch. src/scripts/xzgrep.in | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) commit 8c19216baccb92d011694590df8a1262da2e980c Author: Lasse Collin Date: 2014-06-09 21:21:24 +0300 xz: Force single-threaded mode when --flush-timeout is used. src/xz/coder.c | 11 +++++++++++ 1 file changed, 11 insertions(+) commit 87f1a24810805187d7bbc8ac5512e7eec307ddf5 Author: Lasse Collin Date: 2014-05-25 22:05:39 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit da1718f266fcfc091e7bf08aae1bc986d0e6cc6b Author: Lasse Collin Date: 2014-05-25 21:45:56 +0300 liblzma: Use lzma_alloc_zero() in LZ encoder initialization. This avoids a memzero() call for a newly-allocated memory, which can be expensive when encoding small streams with an over-sized dictionary. To avoid using lzma_alloc_zero() for memory that doesn't need to be zeroed, lzma_mf.son is now allocated separately, which requires handling it separately in normalize() too. Thanks to Vincenzo Innocente for reporting the problem. src/liblzma/lz/lz_encoder.c | 84 ++++++++++++++++++++++-------------------- src/liblzma/lz/lz_encoder.h | 2 +- src/liblzma/lz/lz_encoder_mf.c | 31 +++++++++------- 3 files changed, 62 insertions(+), 55 deletions(-) commit 28af24e9cf2eb259997c85dce13d4c97b3daa47a Author: Lasse Collin Date: 2014-05-25 19:25:57 +0300 liblzma: Add the internal function lzma_alloc_zero(). src/liblzma/common/common.c | 21 +++++++++++++++++++++ src/liblzma/common/common.h | 6 ++++++ 2 files changed, 27 insertions(+) commit ed9ac85822c490e34b68c259afa0b385d21d1c40 Author: Lasse Collin Date: 2014-05-08 18:03:09 +0300 xz: Fix uint64_t vs. size_t which broke 32-bit build. Thanks to Christian Hesse. src/xz/coder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit d716acdae3fa7996f9e68a7bac012e6d8d13dd02 Author: Lasse Collin Date: 2014-05-04 11:09:11 +0300 Docs: Update comments to refer to lzma/lzma12.h in example programs. doc/examples/03_compress_custom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 4d5b7b3fda31241ca86ed35e08e73f776ee916e0 Author: Lasse Collin Date: 2014-05-04 11:07:17 +0300 liblzma: Rename the private API header lzma/lzma.h to lzma/lzma12.h. It can be confusing that two header files have the same name. The public API file is still lzma.h. src/liblzma/api/Makefile.am | 2 +- src/liblzma/api/lzma.h | 2 +- src/liblzma/api/lzma/{lzma.h => lzma12.h} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) commit 1555a9c5664afc7893a2b75e9970105437f01ef1 Author: Lasse Collin Date: 2014-04-25 17:53:42 +0300 Build: Fix the combination of --disable-xzdec --enable-lzmadec. In this case "make install" could fail if the man page directory didn't already exist at the destination. If it did exist, a dangling symlink was created there. Now the link is omitted instead. This isn't the best fix but it's better than the old behavior. src/xzdec/Makefile.am | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) commit 56056571df3377eaa6ae6233b3ccc5d72e81d43d Author: Lasse Collin Date: 2014-04-25 17:44:26 +0300 Build: Add --disable-doc to configure. INSTALL | 6 ++++++ Makefile.am | 2 ++ configure.ac | 6 ++++++ 3 files changed, 14 insertions(+) commit 6de61d8721097a6214810841aa85b08e303ac538 Author: Lasse Collin Date: 2014-04-24 18:06:24 +0300 Update INSTALL. Add a note about failing "make check". The source of the problem should be fixed in libtool (if it really is a libtool bug and not mine) but I'm unable to spend time on that for now. Thanks to Nelson H. F. Beebe for reporting the issue. Add a note about a possible need to run "ldconfig" after "make install". INSTALL | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) commit 54df428799a8d853639b753d0e6784694d73eb3e Author: Lasse Collin Date: 2014-04-09 17:26:10 +0300 xz: Rename a variable to avoid a namespace collision on Solaris. I don't know the details but I have an impression that there's no problem in practice if using GCC since people have built xz with GCC (without patching xz), but renaming the variable cannot hurt either. Thanks to Mark Ashley. src/xz/signals.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) commit 5876ca27daa1429676b1160007d9688266907f00 Author: Lasse Collin Date: 2014-01-29 20:19:41 +0200 Docs: Add example program for threaded encoding. I didn't add -DLZMA_UNSTABLE to Makefile so one has to specify it manually as long as LZMA_UNSTABLE is needed. doc/examples/04_compress_easy_mt.c | 184 +++++++++++++++++++++++++++++++++++++ doc/examples/Makefile | 3 +- 2 files changed, 186 insertions(+), 1 deletion(-) commit 9494fb6d0ff41c585326f00aa8f7fe58f8106a5e Author: Lasse Collin Date: 2014-01-29 20:13:51 +0200 liblzma: Fix lzma_mt.preset not working with lzma_stream_encoder_mt(). It read the filter chain from a wrong variable. src/liblzma/common/stream_encoder_mt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 673a4cb53de3a715685cb1b836da57a3c7dcd43c Author: Lasse Collin Date: 2014-01-20 11:20:40 +0200 liblzma: Fix typo in a comment. src/liblzma/api/lzma/block.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ad96a871a1470eb76d6233d3890ce9338047b7a3 Author: Lasse Collin Date: 2014-01-12 19:38:43 +0200 Windows: Add config.h for building liblzma with MSVC 2013. This is for building liblzma. Building xz tool too requires a little more work. Maybe it will be supported, but for most MSVC users it's enough to be able to build liblzma. C99 support in MSVC 2013 is almost usable which is a big improvement over earlier versions. It's "almost" because there's a dumb bug that breaks mixed declarations after an "if" statements unless the "if" statement uses braces: https://connect.microsoft.com/VisualStudio/feedback/details/808650/visual-studio-2013-c99-compiler-bug https://connect.microsoft.com/VisualStudio/feedback/details/808472/c99-support-of-mixed-declarations-and-statements-fails-with-certain-types-and-constructs Hopefully it will get fixed. Then liblzma should be compilable with MSVC 2013 without patching. windows/config.h | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) commit 3d5c090872fab4212b57c290e8ed4d02c78c1737 Author: Lasse Collin Date: 2014-01-12 17:41:14 +0200 xz: Fix a comment. src/xz/coder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 69fd4e1c932c7975476a0143c86e45d81b60d3f9 Author: Lasse Collin Date: 2014-01-12 17:04:33 +0200 Windows: Add MSVC defines for inline and restrict keywords. src/common/sysdefs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) commit a19d9e8575ee6647cd9154cf1f20203f1330485f Author: Lasse Collin Date: 2014-01-12 16:44:52 +0200 liblzma: Avoid C99 compound literal arrays. MSVC 2013 doesn't like them. Maybe they aren't so good for readability either since many aren't used to them. src/liblzma/lzma/lzma_encoder_presets.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) commit e28528f1c867b2ed4ac91195ad08efb9bb8a6263 Author: Lasse Collin Date: 2014-01-12 12:50:30 +0200 liblzma: Remove a useless C99ism from sha256.c. Unsurprisingly it makes no difference in compiled output. src/liblzma/check/sha256.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5ad1effc45adfb7dabc9a98e79736077e6b7e2d5 Author: Lasse Collin Date: 2014-01-12 12:17:08 +0200 xz: Fix use of wrong variable. Since the only call to suffix_set() uses optarg as the argument, fixing this bug doesn't change the behavior of the program. src/xz/suffix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 3e62c68d75b5a3fdd46dbb34bb335d73289860d5 Author: Lasse Collin Date: 2014-01-12 12:11:36 +0200 Fix typos in comments. src/common/mythread.h | 2 +- src/liblzma/check/crc32_fast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit e90ea601fb72867ec04adf456cbe4bf9520fd412 Author: Lasse Collin Date: 2013-11-26 18:20:16 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit b22e94d8d15764416354e04729382a7371ae2c30 Author: Lasse Collin Date: 2013-11-26 18:20:09 +0200 liblzma: Document the need for block->check for lzma_block_header_decode(). Thanks to Tomer Chachamu. src/liblzma/api/lzma/block.h | 3 +++ 1 file changed, 3 insertions(+) commit d1cd8b1cb824b72421d1ee370e628024d2fcbec4 Author: Lasse Collin Date: 2013-11-12 16:38:57 +0200 xz: Update the man page about --block-size and --block-list. src/xz/xz.1 | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) commit 76be7c612e6bcc38724488ccc3b8bcb1cfec9f0a Author: Lasse Collin Date: 2013-11-12 16:30:53 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit dd750acbe2259d75444ef0f8da2d4bacc90d7afc Author: Lasse Collin Date: 2013-11-12 16:29:48 +0200 xz: Make --block-list and --block-size work together in single-threaded. Previously, --block-list and --block-size only worked together in threaded mode. Boundaries are specified by --block-list, but --block-size specifies the maximum size for a Block. Now this works in single-threaded mode too. Thanks to James M Leddy for the original patch. src/xz/coder.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 15 deletions(-) commit ae222fe9805d0161d022d75ba8485dab8bf6d7d5 Author: Lasse Collin Date: 2013-10-26 13:26:14 +0300 Bump the version number to 5.1.3alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 2193837a6a597cd3bf4e9ddf49421a5697d8e155 Author: Lasse Collin Date: 2013-10-26 13:25:02 +0300 Update NEWS for 5.1.3alpha. NEWS | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) commit ed48e75e2763876173aef8902da407a8eb28854b Author: Lasse Collin Date: 2013-10-26 12:47:04 +0300 Update TODO. TODO | 4 ---- 1 file changed, 4 deletions(-) commit 841da0352d79a56a44796a4c39163429c9f039a3 Author: Lasse Collin Date: 2013-10-25 22:41:28 +0300 xz: Document behavior of --block-list with threads. This needs to be updated before 5.2.0. src/xz/xz.1 | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) commit 56feb8665b78c1032aabd53c619c62af51defe64 Author: Lasse Collin Date: 2013-10-22 20:03:12 +0300 xz: Document --flush-timeout=TIMEOUT on the man page. src/xz/xz.1 | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) commit ba413da1d5bb3324287cf3174922acd921165971 Author: Lasse Collin Date: 2013-10-22 19:51:55 +0300 xz: Take advantage of LZMA_FULL_BARRIER with --block-list. Now if --block-list is used in threaded mode, the encoder won't need to flush at each Block boundary specified via --block-list. This improves performance a lot, making threading helpful with --block-list. The flush timer was reset after LZMA_FULL_FLUSH but since LZMA_FULL_BARRIER doesn't flush, resetting the timer is no longer done. src/xz/coder.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) commit 0cd45fc2bc5537de287a0bc005e2d67467a92148 Author: Lasse Collin Date: 2013-10-02 20:05:23 +0300 liblzma: Support LZMA_FULL_FLUSH and _BARRIER in threaded encoder. Now --block-list=SIZES works with in the threaded mode too, although the performance is still bad due to the use of LZMA_FULL_FLUSH instead of the new LZMA_FULL_BARRIER. src/liblzma/common/stream_encoder_mt.c | 55 ++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 16 deletions(-) commit 97bb38712f414fabecca908af2e38a12570293fd Author: Lasse Collin Date: 2013-10-02 12:55:11 +0300 liblzma: Add LZMA_FULL_BARRIER support to single-threaded encoder. In the single-threaded encoder LZMA_FULL_BARRIER is simply an alias for LZMA_FULL_FLUSH. src/liblzma/api/lzma/base.h | 37 ++++++++++++++++++++++++++++++------- src/liblzma/common/common.c | 17 +++++++++++++++-- src/liblzma/common/common.h | 7 ++++++- src/liblzma/common/stream_encoder.c | 4 +++- 4 files changed, 54 insertions(+), 11 deletions(-) commit fef0c6b410c08e581c9178700a4e7599f0895ff9 Author: Lasse Collin Date: 2013-09-17 11:57:51 +0300 liblzma: Add block_buffer_encoder.h into Makefile.inc. This should have been in b465da5988dd59ad98fda10c2e4ea13d0b9c73bc. src/liblzma/common/Makefile.inc | 1 + 1 file changed, 1 insertion(+) commit 8083e03291b6d21c0f538163e187b4e8cd5594e4 Author: Lasse Collin Date: 2013-09-17 11:55:38 +0300 xz: Add a missing test for TUKLIB_DOSLIKE. src/xz/file_io.c | 2 ++ 1 file changed, 2 insertions(+) commit 6b44b4a775fe29ecc7bcb7996e086e3bc09e5fd0 Author: Lasse Collin Date: 2013-09-17 11:52:28 +0300 Add native threading support on Windows. Now liblzma only uses "mythread" functions and types which are defined in mythread.h matching the desired threading method. Before Windows Vista, there is no direct equivalent to pthread condition variables. Since this package doesn't use pthread_cond_broadcast(), pre-Vista threading can still be kept quite simple. The pre-Vista code doesn't use anything that wasn't already available in Windows 95, so the binaries should run even on Windows 95 if someone happens to care. INSTALL | 41 ++- configure.ac | 118 ++++++-- src/common/mythread.h | 513 ++++++++++++++++++++++++++------- src/liblzma/common/stream_encoder_mt.c | 83 +++--- src/xz/coder.c | 8 +- windows/README-Windows.txt | 2 +- windows/build.bash | 23 +- 7 files changed, 573 insertions(+), 215 deletions(-) commit ae0ab74a88d5b9b15845f1d9a24ade4349a54f9f Author: Lasse Collin Date: 2013-09-11 14:40:35 +0300 Build: Remove a comment about Automake 1.10 from configure.ac. The previous commit supports silent rules and that requires Automake 1.11. configure.ac | 2 -- 1 file changed, 2 deletions(-) commit 72975df6c8c59aaf849138ab3606e8fb6970596a Author: Lasse Collin Date: 2013-09-09 20:37:03 +0300 Build: Create liblzma.pc in a src/liblzma/Makefile.am. Previously it was done in configure, but doing that goes against the Autoconf manual. Autoconf requires that it is possible to override e.g. prefix after running configure and that doesn't work correctly if liblzma.pc is created by configure. A potential downside of this change is that now e.g. libdir in liblzma.pc is a standalone string instead of being defined via ${prefix}, so if one overrides prefix when running pkg-config the libdir won't get the new value. I don't know if this matters in practice. Thanks to Vincent Torri. configure.ac | 1 - src/liblzma/Makefile.am | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) commit 1c2b6e7e8382ed390f53e140f160488bb2205ecc Author: Lasse Collin Date: 2013-08-04 15:24:09 +0300 Fix the previous commit which broke the build. Apparently I didn't even compile-test the previous commit. Thanks to Christian Hesse. src/common/tuklib_cpucores.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 124eb69c7857f618b4807588c51bc9ba21bf8691 Author: Lasse Collin Date: 2013-08-03 13:52:58 +0300 Windows: Add Windows support to tuklib_cpucores(). It is used for Cygwin too. I'm not sure if that is a good or bad idea. Thanks to Vincent Torri. m4/tuklib_cpucores.m4 | 19 +++++++++++++++++-- src/common/tuklib_cpucores.c | 13 ++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) commit eada8a875ce3fd521cb42e4ace2624d3d49c5f35 Author: Anders F Bjorklund Date: 2013-08-02 15:59:46 +0200 macosx: separate liblzma package macosx/build.sh | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) commit be0100d01ca6a75899d051bee00acf17e6dc0c15 Author: Anders F Bjorklund Date: 2013-08-02 15:58:44 +0200 macosx: set minimum to leopard macosx/build.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) commit 416729e2d743f4b2fe9fd438eedeb98adce033c3 Author: Anders F Bjorklund Date: 2011-08-07 13:13:30 +0200 move configurables into variables macosx/build.sh | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) commit 16581080e5f29f9a4e49efece21c5bf572323acc Author: Lasse Collin Date: 2013-07-15 14:08:41 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 3e2b198ba37b624efd9c7caee2a435dc986b46c6 Author: Lasse Collin Date: 2013-07-15 14:08:02 +0300 Build: Fix the detection of missing CRC32. Thanks to Vincent Torri. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit dee6ad3d5915422bc30a6821efeacaeb8ca8ef00 Author: Lasse Collin Date: 2013-07-04 14:18:46 +0300 xz: Add preliminary support for --flush-timeout=TIMEOUT. When --flush-timeout=TIMEOUT is used, xz will use LZMA_SYNC_FLUSH if read() would block and at least TIMEOUT milliseconds has elapsed since the previous flush. This can be useful in realtime-like use cases where the data is simultanously decompressed by another process (possibly on a different computer). If new uncompressed input data is produced slowly, without this option xz could buffer the data for a long time until it would become decompressible from the output. If TIMEOUT is 0, the feature is disabled. This is the default. This commit affects the compression side. Using xz for the decompression side for the above purpose doesn't work yet so well because there is quite a bit of input and output buffering when decompressing. The --long-help or man page were not updated yet. The details of this feature may change. src/xz/args.c | 7 +++++++ src/xz/coder.c | 46 +++++++++++++++++++++++++++++++++++----------- src/xz/file_io.c | 46 ++++++++++++++++++++++++++++++++++++---------- 3 files changed, 78 insertions(+), 21 deletions(-) commit fa381acaf9a29a8114e1c0a97de99bab9adb014e Author: Lasse Collin Date: 2013-07-04 13:41:03 +0300 xz: Don't set src_eof=true after an I/O error because it's useless. src/xz/file_io.c | 3 --- 1 file changed, 3 deletions(-) commit ea00545beace5b950f709ec21e46878e0f448678 Author: Lasse Collin Date: 2013-07-04 13:25:11 +0300 xz: Fix the test when to read more input. Testing for end of file was no longer correct after full flushing became possible with --block-size=SIZE and --block-list=SIZES. There was no bug in practice though because xz just made a few unneeded zero-byte reads. src/xz/coder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit 736903c64bef394c06685d79908e397bcb08b88f Author: Lasse Collin Date: 2013-07-04 12:51:57 +0300 xz: Move some of the timing code into mytime.[hc]. This switches units from microseconds to milliseconds. New clock_gettime(CLOCK_MONOTONIC) will be used if available. There is still a fallback to gettimeofday(). src/xz/Makefile.am | 2 ++ src/xz/coder.c | 5 +++ src/xz/message.c | 54 +++++++++------------------------ src/xz/mytime.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xz/mytime.h | 47 ++++++++++++++++++++++++++++ src/xz/private.h | 1 + 6 files changed, 158 insertions(+), 40 deletions(-) commit 24edf8d807e24ffaa1e793114d94cca3b970027d Author: Lasse Collin Date: 2013-07-01 14:35:03 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit c0627b3fceacfa1ed162f5f55235360ea26f569a Author: Lasse Collin Date: 2013-07-01 14:34:11 +0300 xz: Silence a warning seen with _FORTIFY_SOURCE=2. Thanks to Christian Hesse. src/xz/file_io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) commit 1936718bb38ee394bd89836fdd4eabc0beb02443 Author: Lasse Collin Date: 2013-06-30 19:40:11 +0300 Update NEWS for 5.0.5. NEWS | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) commit a37ae8b5eb6093a530198f109c6f7a538c80ecf0 Author: Lasse Collin Date: 2013-06-30 18:02:27 +0300 Man pages: Use similar syntax for synopsis as in xz. The man pages of lzmainfo, xzmore, and xzdec had similar constructs as the man page of xz had before the commit eb6ca9854b8eb9fbf72497c1cf608d6b19d2d494. Eric S. Raymond didn't mention these man pages in his bug report, but it's nice to be consistent. src/lzmainfo/lzmainfo.1 | 4 ++-- src/scripts/xzmore.1 | 6 +++--- src/xzdec/xzdec.1 | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) commit cdba9ddd870ae72fd6219a125662c20ec997f86c Author: Lasse Collin Date: 2013-06-29 15:59:13 +0300 xz: Use non-blocking I/O for the output file. Now both reading and writing should be without race conditions with signals. They might still be signal handling issues left. Signals are blocked during many operations to avoid EINTR but it may cause problems e.g. if writing to stderr blocks when trying to display an error message. src/xz/file_io.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 8 deletions(-) commit e61a5c95da3fe31281d959e5e842885a8ba2b5bd Author: Lasse Collin Date: 2013-06-28 23:56:17 +0300 xz: Fix return value type in io_write_buf(). It didn't affect the behavior of the code since -1 becomes true anyway. src/xz/file_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 9dc319eabb34a826f4945f91c71620f14a60e9e2 Author: Lasse Collin Date: 2013-06-28 23:48:05 +0300 xz: Use the self-pipe trick to avoid a race condition with signals. It is possible that a signal to set user_abort arrives right before a blocking system call is made. In this case the call may block until another signal arrives, while the wanted behavior is to make xz clean up and exit as soon as possible. After this commit, the race condition is avoided with the input side which already uses non-blocking I/O. The output side still uses blocking I/O and thus has the race condition. src/xz/file_io.c | 56 ++++++++++++++++++++++++++++++++++++++++++++------------ src/xz/file_io.h | 8 ++++++++ src/xz/signals.c | 5 +++++ 3 files changed, 57 insertions(+), 12 deletions(-) commit 3541bc79d0cfabc0ad155c99bfdad1289f17fec3 Author: Lasse Collin Date: 2013-06-28 22:51:02 +0300 xz: Use non-blocking I/O for the input file. src/xz/file_io.c | 156 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 111 insertions(+), 45 deletions(-) commit 78673a08bed5066c81e8a8e90d20e670c28ecfd5 Author: Lasse Collin Date: 2013-06-28 18:46:13 +0300 xz: Remove an outdated NetBSD-specific comment. Nowadays errno == EFTYPE is documented in open(2). src/xz/file_io.c | 4 ---- 1 file changed, 4 deletions(-) commit a616fdad34b48b2932ef03fb87309dcc8b829527 Author: Lasse Collin Date: 2013-06-28 18:09:47 +0300 xz: Fix error detection of fcntl(fd, F_SETFL, flags) calls. POSIX says that fcntl(fd, F_SETFL, flags) returns -1 on error and "other than -1" on success. This is how it is documented e.g. on OpenBSD too. On Linux, success with F_SETFL is always 0 (at least accorinding to fcntl(2) from man-pages 3.51). src/xz/file_io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit 4a08a6e4c61c65ab763ab314100a6d7a3bb89298 Author: Lasse Collin Date: 2013-06-28 17:36:47 +0300 xz: Fix use of wrong variable in a fcntl() call. Due to a wrong variable name, when writing a sparse file to standard output, *all* file status flags were cleared (to the extent the operating system allowed it) instead of only clearing the O_APPEND flag. In practice this worked fine in the common situations on GNU/Linux, but I didn't check how it behaved elsewhere. The original flags were still restored correctly. I still changed the code to use a separate boolean variable to indicate when the flags should be restored instead of relying on a special value in stdout_flags. src/xz/file_io.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) commit b790b435daa3351067f80a5973b647f8d55367a2 Author: Lasse Collin Date: 2013-06-28 14:55:37 +0300 xz: Fix assertion related to posix_fadvise(). Input file can be a FIFO or something else that doesn't support posix_fadvise() so don't check the return value even with an assertion. Nothing bad happens if the call to posix_fadvise() fails. src/xz/file_io.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) commit 84d2da6c9dc252f441deb7626c2522202b005d4d Author: Lasse Collin Date: 2013-06-26 13:30:57 +0300 xz: Check the value of lzma_stream_flags.version in --list. It is a no-op for now, but if an old xz version is used together with a newer liblzma that supports something new, then this check becomes important and will stop the old xz from trying to parse files that it won't understand. src/xz/list.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) commit 9376f5f8f762296f2173d61af9101112c36f38c0 Author: Lasse Collin Date: 2013-06-26 12:17:00 +0300 Build: Require Automake 1.12 and use serial-tests option. It should actually still work with Automake 1.10 if the serial-tests option is removed. Automake 1.13 started using parallel tests by default and the option to get the old behavior isn't supported before 1.12. At least for now, parallel tests don't improve anything in XZ Utils but they hide the progress output from test_compress.sh. configure.ac | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit b7e200d7bd0a3c7c171c13ad37d68296d6f73374 Author: Lasse Collin Date: 2013-06-23 18:59:13 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 46540e4c10923e363741ff5aab99e79fc0ce6ee8 Author: Lasse Collin Date: 2013-06-23 18:57:23 +0300 liblzma: Avoid a warning about a shadowed variable. On Mac OS X wait() is declared in that we include one way or other so don't use "wait" as a variable name. Thanks to Christian Kujau. src/liblzma/common/stream_encoder_mt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit ebb501ec73cecc546c67117dd01b5e33c90bfb4a Author: Lasse Collin Date: 2013-06-23 17:36:47 +0300 xz: Validate Uncompressed Size from Block Header in list.c. This affects only "xz -lvv". Normal decompression with xz already detected if Block Header and Index had mismatched Uncompressed Size fields. So this just makes "xz -lvv" show such files as corrupt instead of showing the Uncompressed Size from Index. src/xz/list.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) commit c09e91dd236d3cabee0fc48312b3dc8cceae41ab Author: Lasse Collin Date: 2013-06-21 22:08:11 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit eb6ca9854b8eb9fbf72497c1cf608d6b19d2d494 Author: Lasse Collin Date: 2013-06-21 22:04:45 +0300 xz: Make the man page more friendly to doclifter. Thanks to Eric S. Raymond. src/xz/xz.1 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) commit 0c0a1947e6ad90a0a10b7a5c39f6ab99a0aa5c93 Author: Lasse Collin Date: 2013-06-21 21:54:59 +0300 xz: A couple of man page fixes. Now the interaction of presets and custom filter chains is described correctly. Earlier it contradicted itself. Thanks to DevHC who reported these issues on IRC to me on 2012-12-14. src/xz/xz.1 | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) commit 2fcda89939c903106c429e109083d43d894049e0 Author: Lasse Collin Date: 2013-06-21 21:50:26 +0300 xz: Fix interaction between preset and custom filter chains. There was somewhat illogical behavior when --extreme was specified and mixed with custom filter chains. Before this commit, "xz -9 --lzma2 -e" was equivalent to "xz --lzma2". After it is equivalent to "xz -6e" (all earlier preset options get forgotten when a custom filter chain is specified and the default preset is 6 to which -e is applied). I find this less illogical. This also affects the meaning of "xz -9e --lzma2 -7". Earlier it was equivalent to "xz -7e" (the -e specified before a custom filter chain wasn't forgotten). Now it is "xz -7". Note that "xz -7e" still is the same as "xz -e7". Hopefully very few cared about this in the first place, so pretty much no one should even notice this change. Thanks to Conley Moorhous. src/xz/coder.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) commit 97379c5ea758da3f8b0bc444d5f7fa43753ce610 Author: Lasse Collin Date: 2013-04-27 22:07:46 +0300 Build: Use -Wvla with GCC if supported. Variable-length arrays are mandatory in C99 but optional in C11. The code doesn't currently use any VLAs and it shouldn't in the future either to stay compatible with C11 without requiring any optional C11 features. configure.ac | 1 + 1 file changed, 1 insertion(+) commit 8957c58609d3987c58aa72b96c436cf565cc4917 Author: Lasse Collin Date: 2013-04-15 19:29:09 +0300 xzdec: Improve the --help message. The options are now ordered in the same order as in xz's help message. Descriptions were added to the options that are ignored. I left them in parenthesis even if it looks a bit weird because I find it easier to spot the ignored vs. non-ignored options from the list that way. src/xzdec/xzdec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) commit ed886e1a92534a24401d0e99c11f1dcff3b5220a Author: Lasse Collin Date: 2013-04-05 19:25:40 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 5019413a055ce29e660dbbf15e02443cb5a26c59 Author: Jeff Bastian Date: 2013-04-03 13:59:17 +0200 xzgrep: make the '-h' option to be --no-filename equivalent * src/scripts/xzgrep.in: Accept the '-h' option in argument parsing. src/scripts/xzgrep.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 5ea900cb5ad862bca81316729f92357c1fc040ce Author: Lasse Collin Date: 2013-03-23 22:25:15 +0200 liblzma: Be less picky in lzma_alone_decoder(). To avoid false positives when detecting .lzma files, rare values in dictionary size and uncompressed size fields were rejected. They will still be rejected if .lzma files are decoded with lzma_auto_decoder(), but when using lzma_alone_decoder() directly, such files will now be accepted. Hopefully this is an OK compromise. This doesn't affect xz because xz still has its own file format detection code. This does affect lzmadec though. So after this commit lzmadec will accept files that xz or xz-emulating-lzma doesn't. NOTE: lzma_alone_decoder() still won't decode all .lzma files because liblzma's LZMA decoder doesn't support lc + lp > 4. Reported here: http://sourceforge.net/projects/lzmautils/forums/forum/708858/topic/7068827 src/liblzma/common/alone_decoder.c | 22 ++++++++++++++-------- src/liblzma/common/alone_decoder.h | 5 +++-- src/liblzma/common/auto_decoder.c | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) commit bb117fffa84604b6e3811b068c80db82bf7f7b05 Author: Lasse Collin Date: 2013-03-23 21:55:13 +0200 liblzma: Use lzma_block_buffer_bound64() in threaded encoder. Now it uses lzma_block_uncomp_encode() if the data doesn't fit into the space calculated by lzma_block_buffer_bound64(). src/liblzma/common/stream_encoder_mt.c | 66 +++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 16 deletions(-) commit e572e123b55b29527e54ce5f0807f115481d78b9 Author: Lasse Collin Date: 2013-03-23 21:51:38 +0200 liblzma: Fix another deadlock in the threaded encoder. This race condition could cause a deadlock if lzma_end() was called before finishing the encoding. This can happen with xz with debugging enabled (non-debugging version doesn't call lzma_end() before exiting). src/liblzma/common/stream_encoder_mt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) commit b465da5988dd59ad98fda10c2e4ea13d0b9c73bc Author: Lasse Collin Date: 2013-03-23 19:17:33 +0200 liblzma: Add lzma_block_uncomp_encode(). This also adds a new internal function lzma_block_buffer_bound64() which is similar to lzma_block_buffer_bound() but uses uint64_t instead of size_t. src/liblzma/api/lzma/block.h | 18 ++++++ src/liblzma/common/block_buffer_encoder.c | 94 +++++++++++++++++++++---------- src/liblzma/common/block_buffer_encoder.h | 24 ++++++++ src/liblzma/liblzma.map | 1 + 4 files changed, 106 insertions(+), 31 deletions(-) commit 9e6dabcf22ef4679f4faaae15ebd5b137ae2fad1 Author: Lasse Collin Date: 2013-03-05 19:14:50 +0200 Avoid unneeded use of awk in xzless. Use "read" instead of "awk" in xzless to get the version number of "less". The need for awk was introduced in the commit db5c1817fabf7cbb9e4087b1576eb26f0747338e. Thanks to Ariel P for the patch. src/scripts/xzless.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) commit e7b424d267a34803db8d92a3515528be2ed45abd Author: Lasse Collin Date: 2012-12-14 20:13:32 +0200 Make the progress indicator smooth in threaded mode. This adds lzma_get_progress() to liblzma and takes advantage of it in xz. lzma_get_progress() collects progress information from the thread-specific structures so that fairly accurate progress information is available to applications. Adding a new function seemed to be a better way than making the information directly available in lzma_stream (like total_in and total_out are) because collecting the information requires locking mutexes. It's waste of time to do it more often than the up to date information is actually needed by an application. src/liblzma/api/lzma/base.h | 22 +++++++++- src/liblzma/common/common.c | 16 +++++++ src/liblzma/common/common.h | 6 +++ src/liblzma/common/stream_encoder_mt.c | 77 +++++++++++++++++++++++++++++++--- src/liblzma/liblzma.map | 1 + src/xz/message.c | 20 +++++---- 6 files changed, 129 insertions(+), 13 deletions(-) commit 2ebbb994e367f55f2561aa7c9e7451703c171f2f Author: Lasse Collin Date: 2012-12-14 11:01:41 +0200 liblzma: Fix mythread_sync for nested locking. src/common/mythread.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) commit 4c7e28705f6de418d19cc77324ef301f996e01ff Author: Lasse Collin Date: 2012-12-13 21:05:36 +0200 xz: Mention --threads in --help. Thanks to Olivier Delhomme for pointing out that this was still missing. src/xz/message.c | 4 ++++ 1 file changed, 4 insertions(+) commit db5c1817fabf7cbb9e4087b1576eb26f0747338e Author: Jonathan Nieder Date: 2012-11-19 00:10:10 -0800 xzless: Make "less -V" parsing more robust In v4.999.9beta~30 (xzless: Support compressed standard input, 2009-08-09), xzless learned to parse ‘less -V’ output to figure out whether less is new enough to handle $LESSOPEN settings starting with “|-”. That worked well for a while, but the version string from ‘less’ versions 448 (June, 2012) is misparsed, producing a warning: $ xzless /tmp/test.xz; echo $? /usr/bin/xzless: line 49: test: 456 (GNU regular expressions): \ integer expression expected 0 More precisely, modern ‘less’ lists the regexp implementation along with its version number, and xzless passes the entire version number with attached parenthetical phrase as a number to "test $a -gt $b", producing the above confusing message. $ less-444 -V | head -1 less 444 $ less -V | head -1 less 456 (no regular expressions) So relax the pattern matched --- instead of expecting "less ", look for a line of the form "less [ (extra parenthetical)]". While at it, improve the behavior when no matching line is found --- instead of producing a cryptic message, we can fall back on a LESSPIPE setting that is supported by all versions of ‘less’. The implementation uses "awk" for simplicity. Hopefully that’s portable enough. Reported-by: Jörg-Volker Peetz Signed-off-by: Jonathan Nieder src/scripts/xzless.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 65536214a31ecd33b6b03b68a351fb597d3703d6 Author: Lasse Collin Date: 2012-10-03 15:54:24 +0300 xz: Fix the note about --rsyncable on the man page. src/xz/xz.1 | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) commit 3d93b6354927247a1569caf22ad27b07e97ee904 Author: Lasse Collin Date: 2012-09-28 20:11:09 +0300 xz: Improve handling of failed realloc in xrealloc. Thanks to Jim Meyering. src/xz/util.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) commit ab225620664e235637833be2329935f9d290ba80 Author: Lasse Collin Date: 2012-08-24 16:27:31 +0300 A few typo fixes to comments and the xz man page. Thanks to Jim Meyering. configure.ac | 2 +- src/liblzma/check/sha256.c | 1 - src/xz/xz.1 | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) commit f3c1ec69d910175ffd431fd82968dd35cec806ed Author: Lasse Collin Date: 2012-08-13 21:40:09 +0300 xz: Add a warning to --help about alpha and beta versions. src/xz/message.c | 5 +++++ 1 file changed, 5 insertions(+) commit d8eaf9d8278c23c2cf2b7ca5562d4de570d3b5db Author: Lasse Collin Date: 2012-08-02 17:13:30 +0300 Build: Bump gettext version requirement to 0.18. Otherwise too old version of m4/lib-link.m4 gets included when autoreconf -fi is run. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 96e08902b09f0f304d4ff80c6e83ef7fff883f34 Author: Lasse Collin Date: 2012-07-17 18:29:08 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 3778db1be53e61ff285c573af5ee468803008456 Author: Lasse Collin Date: 2012-07-17 18:19:59 +0300 liblzma: Make the use of lzma_allocator const-correct. There is a tiny risk of causing breakage: If an application assigns lzma_stream.allocator to a non-const pointer, such code won't compile anymore. I don't know why anyone would do such a thing though, so in practice this shouldn't cause trouble. Thanks to Jan Kratochvil for the patch. src/liblzma/api/lzma/base.h | 4 +++- src/liblzma/api/lzma/block.h | 6 ++--- src/liblzma/api/lzma/container.h | 9 +++++--- src/liblzma/api/lzma/filter.h | 13 ++++++----- src/liblzma/api/lzma/index.h | 16 ++++++------- src/liblzma/api/lzma/index_hash.h | 4 ++-- src/liblzma/common/alone_decoder.c | 6 ++--- src/liblzma/common/alone_decoder.h | 2 +- src/liblzma/common/alone_encoder.c | 8 +++---- src/liblzma/common/auto_decoder.c | 6 ++--- src/liblzma/common/block_buffer_decoder.c | 2 +- src/liblzma/common/block_buffer_encoder.c | 4 ++-- src/liblzma/common/block_decoder.c | 6 ++--- src/liblzma/common/block_decoder.h | 2 +- src/liblzma/common/block_encoder.c | 8 +++---- src/liblzma/common/block_encoder.h | 2 +- src/liblzma/common/block_header_decoder.c | 4 ++-- src/liblzma/common/common.c | 10 ++++----- src/liblzma/common/common.h | 20 +++++++++-------- src/liblzma/common/easy_buffer_encoder.c | 4 ++-- src/liblzma/common/filter_buffer_decoder.c | 3 ++- src/liblzma/common/filter_buffer_encoder.c | 7 +++--- src/liblzma/common/filter_common.c | 4 ++-- src/liblzma/common/filter_common.h | 2 +- src/liblzma/common/filter_decoder.c | 7 +++--- src/liblzma/common/filter_decoder.h | 2 +- src/liblzma/common/filter_encoder.c | 2 +- src/liblzma/common/filter_encoder.h | 2 +- src/liblzma/common/filter_flags_decoder.c | 2 +- src/liblzma/common/index.c | 26 ++++++++++----------- src/liblzma/common/index_decoder.c | 12 +++++----- src/liblzma/common/index_encoder.c | 6 ++--- src/liblzma/common/index_encoder.h | 2 +- src/liblzma/common/index_hash.c | 6 +++-- src/liblzma/common/outqueue.c | 4 ++-- src/liblzma/common/outqueue.h | 5 +++-- src/liblzma/common/stream_buffer_decoder.c | 2 +- src/liblzma/common/stream_buffer_encoder.c | 3 ++- src/liblzma/common/stream_decoder.c | 9 ++++---- src/liblzma/common/stream_decoder.h | 5 +++-- src/liblzma/common/stream_encoder.c | 10 ++++----- src/liblzma/common/stream_encoder_mt.c | 16 ++++++------- src/liblzma/delta/delta_common.c | 4 ++-- src/liblzma/delta/delta_decoder.c | 6 ++--- src/liblzma/delta/delta_decoder.h | 5 +++-- src/liblzma/delta/delta_encoder.c | 6 ++--- src/liblzma/delta/delta_encoder.h | 3 ++- src/liblzma/delta/delta_private.h | 2 +- src/liblzma/lz/lz_decoder.c | 8 +++---- src/liblzma/lz/lz_decoder.h | 7 +++--- src/liblzma/lz/lz_encoder.c | 19 ++++++++-------- src/liblzma/lz/lz_encoder.h | 6 ++--- src/liblzma/lzma/lzma2_decoder.c | 8 +++---- src/liblzma/lzma/lzma2_decoder.h | 5 +++-- src/liblzma/lzma/lzma2_encoder.c | 6 ++--- src/liblzma/lzma/lzma2_encoder.h | 2 +- src/liblzma/lzma/lzma_decoder.c | 8 +++---- src/liblzma/lzma/lzma_decoder.h | 7 +++--- src/liblzma/lzma/lzma_encoder.c | 7 +++--- src/liblzma/lzma/lzma_encoder.h | 5 +++-- src/liblzma/simple/arm.c | 8 ++++--- src/liblzma/simple/armthumb.c | 8 ++++--- src/liblzma/simple/ia64.c | 8 ++++--- src/liblzma/simple/powerpc.c | 8 ++++--- src/liblzma/simple/simple_coder.c | 10 ++++----- src/liblzma/simple/simple_coder.h | 36 ++++++++++++++++++++---------- src/liblzma/simple/simple_decoder.c | 2 +- src/liblzma/simple/simple_decoder.h | 2 +- src/liblzma/simple/simple_private.h | 3 ++- src/liblzma/simple/sparc.c | 8 ++++--- src/liblzma/simple/x86.c | 8 ++++--- 71 files changed, 269 insertions(+), 219 deletions(-) commit d625c7cf824fd3b61c6da84f56179e94917ff603 Author: Lasse Collin Date: 2012-07-05 07:36:28 +0300 Tests: Remove tests/test_block.c that had gotten committed accidentally. tests/test_block.c | 52 ---------------------------------------------------- 1 file changed, 52 deletions(-) commit 0b09d266cce72bc4841933b171e79551e488927c Author: Lasse Collin Date: 2012-07-05 07:33:35 +0300 Build: Include macosx/build.sh in the distribution. It has been in the Git repository since 2010 but probably few people have seen it since it hasn't been included in the release tarballs. :-( Makefile.am | 1 + 1 file changed, 1 insertion(+) commit d6e0b23d4613b9f417893dd96cc168c8005ece3d Author: Lasse Collin Date: 2012-07-05 07:28:53 +0300 Build: Include validate_map.sh in the distribution. It's required by "make mydist". Fix also the location of EXTRA_DIST+= so that those files get distributed also if symbol versioning isn't enabled. src/liblzma/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 19de545d86097c3954d69ab5d12820387f6a09bc Author: Lasse Collin Date: 2012-07-05 07:24:45 +0300 Docs: Fix the name LZMA Utils -> XZ Utils in debug/README. debug/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 672eccf57c31a40dfb956b7662db06d43e18618e Author: Lasse Collin Date: 2012-07-05 07:23:17 +0300 Include debug/translation.bash in the distribution. Also fix the script name mentioned in README. README | 4 ++-- debug/Makefile.am | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) commit cafb523adac1caf305e70a04bc37f25602bf990c Author: Lasse Collin Date: 2012-07-04 22:31:58 +0300 xz: Document --block-list better. Thanks to Jonathan Nieder. src/xz/xz.1 | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) commit c7ff218528bc8f7c65e7ef73c6515777346c6794 Author: Lasse Collin Date: 2012-07-04 20:01:49 +0300 Bump the version number to 5.1.2alpha. src/liblzma/api/lzma/version.h | 2 +- src/liblzma/liblzma.map | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 8f3c1d886f93e6478ad509ff52102b2ce7faa999 Author: Lasse Collin Date: 2012-07-04 20:01:19 +0300 Update NEWS for 5.1.2alpha. NEWS | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) commit 0d5fa05466e580fbc458820f87013ae7644e20e5 Author: Lasse Collin Date: 2012-07-04 19:58:23 +0300 xz: Fix the version number printed by xz -lvv. The decoder bug was fixed in 5.0.2 instead of 5.0.3. src/xz/list.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit df11317985a4165731dde12bb0f0028da0e7b77f Author: Lasse Collin Date: 2012-07-04 17:11:31 +0300 Build: Add a comment to configure.ac about symbol versioning. configure.ac | 4 ++++ 1 file changed, 4 insertions(+) commit bd9cc179e8be3ef515201d3ed9c7dd79ae88869d Author: Lasse Collin Date: 2012-07-04 17:06:49 +0300 Update TODO. TODO | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit 4a238dd9b22f462cac5e199828bf1beb0df05884 Author: Lasse Collin Date: 2012-07-04 17:05:46 +0300 Document --enable-symbol-versions in INSTALL. INSTALL | 5 +++++ 1 file changed, 5 insertions(+) commit 88ccf47205d7f3aa314d358c72ef214f10f68b43 Author: Lasse Collin Date: 2012-07-03 21:16:39 +0300 xz: Add incomplete support for --block-list. It's broken with threads and when also --block-size is used. src/xz/args.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xz/args.h | 1 + src/xz/coder.c | 48 ++++++++++++++++++++++++++++------ src/xz/coder.h | 4 +++ src/xz/main.c | 1 + src/xz/message.c | 6 +++++ src/xz/xz.1 | 23 +++++++++++++++-- 7 files changed, 151 insertions(+), 10 deletions(-) commit 972179cdcdf5d8949c48ee31737d87d3050b44af Author: Lasse Collin Date: 2012-07-01 18:44:33 +0300 xz: Update the man page about the new field in --robot -lvv. src/xz/xz.1 | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) commit 1403707fc64a70976aebe66f8d9a9bd12f73a2c5 Author: Lasse Collin Date: 2012-06-28 10:47:49 +0300 liblzma: Check that the first byte of range encoded data is 0x00. It is just to be more pedantic and thus perhaps catch broken files slightly earlier. src/liblzma/lzma/lzma_decoder.c | 8 ++++++-- src/liblzma/rangecoder/range_decoder.h | 12 +++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) commit eccd8017ffe2c5de473222c4963ec53c62f7fda2 Author: Lasse Collin Date: 2012-06-22 19:00:23 +0300 Update NEWS from 5.0.4. NEWS | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) commit 2e6754eac26a431e8d340c28906f63bcd1e177e8 Author: Lasse Collin Date: 2012-06-22 14:34:03 +0300 xz: Update man page date to match the latest update. src/xz/xz.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b3235a0b1af45d5e1244cbe3191516966c076fa0 Author: Lasse Collin Date: 2012-06-18 21:27:47 +0300 Docs: Language fix to 01_compress_easy.c. Thanks to Jonathan Nieder. doc/examples/01_compress_easy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit f1675f765fe228cb5a5f904f853445a03e33cfe9 Author: Lasse Collin Date: 2012-06-14 20:15:30 +0300 Fix the top-level Makefile.am for the new example programs. Makefile.am | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) commit 3a0c5378abefaf86aa39a62a7c9682bdb21568a1 Author: Lasse Collin Date: 2012-06-14 10:52:33 +0300 Docs: Add new example programs. These have more comments than the old examples and human-readable error messages. More tutorial-like examples are needed but these are a start. doc/examples/00_README.txt | 27 ++++ doc/examples/01_compress_easy.c | 297 ++++++++++++++++++++++++++++++++++++++ doc/examples/02_decompress.c | 287 ++++++++++++++++++++++++++++++++++++ doc/examples/03_compress_custom.c | 193 +++++++++++++++++++++++++ doc/examples/Makefile | 23 +++ 5 files changed, 827 insertions(+) commit 1bd2c2c553e30c4a73cfb82abc6908efd6be6b8d Author: Lasse Collin Date: 2012-06-14 10:33:27 +0300 Docs: Move xz_pipe_comp.c and xz_pipe_decomp.c to doc/examples_old. It is good to keep these around to so that if someone has copied the decompressor bug from xz_pipe_decomp.c he has an example how to easily fix it. doc/{examples => examples_old}/xz_pipe_comp.c | 0 doc/{examples => examples_old}/xz_pipe_decomp.c | 0 2 files changed, 0 insertions(+), 0 deletions(-) commit 905f0ab5b5ce544d4b68a2ed6077df0f3d021292 Author: Lasse Collin Date: 2012-06-14 10:33:01 +0300 Docs: Fix a bug in xz_pipe_decomp.c example program. doc/examples/xz_pipe_decomp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) commit 4bd1a3bd5fdf4870b2f96dd0b8a21657c8a58ad8 Author: Lasse Collin Date: 2012-05-30 23:14:33 +0300 Translations: Update the French translation. Thanks to Adrien Nader. po/fr.po | 148 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 79 insertions(+), 69 deletions(-) commit d2e836f2f3a87df6fe6bb0589b037db51205d910 Author: Lasse Collin Date: 2012-05-29 23:42:37 +0300 Translations: Update the German translation. The previous only included the new strings in v5.0. po/de.po | 229 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 133 insertions(+), 96 deletions(-) commit c9a16151577ba459afd6e3528df23bc0ddb95171 Author: Lasse Collin Date: 2012-05-29 22:26:27 +0300 Translations: Update the German translation. po/de.po | 169 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 91 insertions(+), 78 deletions(-) commit 1530a74fd48f8493372edad137a24541efe24713 Author: Lasse Collin Date: 2012-05-29 22:14:21 +0300 Translations: Update Polish translation. po/pl.po | 283 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 165 insertions(+), 118 deletions(-) commit d8db706acb8316f9861abd432cfbe001dd6d0c5c Author: Lasse Collin Date: 2012-05-28 20:42:11 +0300 liblzma: Fix possibility of incorrect LZMA_BUF_ERROR. lzma_code() could incorrectly return LZMA_BUF_ERROR if all of the following was true: - The caller knows how many bytes of output to expect and only provides that much output space. - When the last output bytes are decoded, the caller-provided input buffer ends right before the LZMA2 end of payload marker. So LZMA2 won't provide more output anymore, but it won't know it yet and thus won't return LZMA_STREAM_END yet. - A BCJ filter is in use and it hasn't left any unfiltered bytes in the temp buffer. This can happen with any BCJ filter, but in practice it's more likely with filters other than the x86 BCJ. Another situation where the bug can be triggered happens if the uncompressed size is zero bytes and no output space is provided. In this case the decompression can fail even if the whole input file is given to lzma_code(). A similar bug was fixed in XZ Embedded on 2011-09-19. src/liblzma/simple/simple_coder.c | 2 +- tests/Makefile.am | 4 +- tests/test_bcj_exact_size.c | 112 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) commit 3f94b6d87f1b8f1c421ba548f8ebb83dca9c8cda Author: Lasse Collin Date: 2012-05-28 15:38:32 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 7769ea051d739a38a1640fd448cf5eb83cb119c6 Author: Lasse Collin Date: 2012-05-28 15:37:43 +0300 xz: Don't show a huge number in -vv when memory limit is disabled. src/xz/message.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) commit ec921105725e4d3ef0a683dd83eee6f24ab60ccd Author: Lasse Collin Date: 2012-05-27 22:30:17 +0300 xz: Document the "summary" lines of --robot -lvv. This documents only the columns that are in v5.0. The new columns added in the master branch aren't necessarily stable yet. src/xz/xz.1 | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) commit 27d24eb0a9f6eed96d6a4594c2b0bf7a91d29f9a Author: Lasse Collin Date: 2012-05-27 21:53:20 +0300 xz: Fix output of verbose --robot --list modes. It printed the filename in "filename (x/y)" format which it obviously shouldn't do in robot mode. src/xz/message.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit ab25b82a91754d9388c89abddf806424671d9431 Author: Lasse Collin Date: 2012-05-24 18:33:54 +0300 Build: Upgrade m4/acx_pthread.m4 to the latest version. m4/ax_pthread.m4 | 98 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 36 deletions(-) commit d05d6d65c41a4bc83f162fa3d67c5d84e8751634 Author: Lasse Collin Date: 2012-05-10 21:15:17 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit e077391982f9f28dbfe542bba8800e7c5b916666 Author: Lasse Collin Date: 2012-05-10 21:14:16 +0300 Docs: Cleanup line wrapping a bit. README | 12 ++++++------ doc/history.txt | 49 +++++++++++++++++++++++++------------------------ 2 files changed, 31 insertions(+), 30 deletions(-) commit fc39849c350225c6a1cd7f6e6adff1020521eabc Author: Benno Schulenberg Date: 2012-03-13 22:04:04 +0100 Fix a few typos and add some missing articles in some documents. Also hyphenate several compound adjectives. Signed-off-by: Benno Schulenberg AUTHORS | 6 +++--- README | 42 ++++++++++++++++++++--------------------- doc/faq.txt | 24 ++++++++++++------------ doc/history.txt | 58 ++++++++++++++++++++++++++++----------------------------- 4 files changed, 65 insertions(+), 65 deletions(-) commit 29fa0566d5df199cb9acb2d17bf7eea61acc7fa1 Author: Lasse Collin Date: 2012-04-29 11:51:25 +0300 Windows: Update notes about static linking with MSVC. windows/README-Windows.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) commit aac1b31ea4e66cf5a7a8c116bdaa15aa45e6c56e Author: Lasse Collin Date: 2012-04-19 15:25:26 +0300 liblzma: Remove outdated comments. src/liblzma/simple/simple_coder.c | 3 --- src/liblzma/simple/simple_private.h | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) commit df14a46013bea70c0bd35be7821b0b9108f97de7 Author: Lasse Collin Date: 2012-04-19 14:17:52 +0300 DOS: Link against DJGPP's libemu to support FPU emulation. This way xz should work on 386SX and 486SX. Floating point only is needed for verbose output in xz. dos/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 03ed742a3a4931bb5c821357832083b26f577b13 Author: Lasse Collin Date: 2012-04-19 14:02:25 +0300 liblzma: Fix Libs.private in liblzma.pc to include -lrt when needed. src/liblzma/liblzma.pc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 8c5b13ad59df70f49429bfdfd6ac120b8f892fda Author: Lasse Collin Date: 2012-04-19 13:58:55 +0300 Docs: Update MINIX 3 information in INSTALL. INSTALL | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) commit c7376fc415a1566f38b2de4b516a17013d516a8b Author: Lasse Collin Date: 2012-02-22 14:23:13 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit cff070aba6281ba743d29a62b8c0c66e5da4b2a6 Author: Lasse Collin Date: 2012-02-22 14:02:34 +0200 Fix exit status of xzgrep when grepping binary files. When grepping binary files, grep may exit before it has read all the input. In this case, gzip -q returns 2 (eating SIGPIPE), but xz and bzip2 show SIGPIPE as the exit status (e.g. 141). This causes wrong exit status when grepping xz- or bzip2-compressed binary files. The fix checks for the special exit status that indicates SIGPIPE. It uses kill -l which should be supported everywhere since it is in both SUSv2 (1997) and POSIX.1-2008. Thanks to James Buren for the bug report. src/scripts/xzgrep.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 41cafb2bf9beea915710ee68f05fe929cd17759c Author: Lasse Collin Date: 2012-02-22 12:08:43 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 2dcea03712fa881930d69ec9eff70855c3d126d9 Author: Lasse Collin Date: 2012-02-22 12:00:16 +0200 Fix compiling with IBM XL C on AIX. INSTALL | 36 ++++++++++++++++++++++-------------- configure.ac | 6 +++++- 2 files changed, 27 insertions(+), 15 deletions(-) commit 7db6bdf4abcf524115be2cf5659ed540cef074c5 Author: Lasse Collin Date: 2012-01-10 17:13:03 +0200 Tests: Fix a compiler warning with _FORTIFY_SOURCE. Reported here: http://sourceforge.net/projects/lzmautils/forums/forum/708858/topic/4927385 tests/create_compress_files.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) commit 694952d545b6cf056547893ced69486eff9ece55 Author: Lasse Collin Date: 2011-12-19 21:21:29 +0200 Docs: Explain the stable releases better in README. README | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit 418fe668b3c53a9a20020b6cc652aaf25c734b29 Author: Lasse Collin Date: 2011-11-07 13:07:52 +0200 xz: Show minimum required XZ Utils version in xz -lvv. Man page wasn't updated yet. src/xz/list.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 6 deletions(-) commit 7081d82c37326bac97184e338345fa1c327e3580 Author: Lasse Collin Date: 2011-11-04 17:57:16 +0200 xz: Fix a typo in a comment. Thanks to Bela Lubkin. src/xz/args.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 232fe7cd70ad258d6a37f17e860e0f1b1891eeb5 Author: Lasse Collin Date: 2011-11-03 17:08:02 +0200 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 74d2bae4d3449c68453b0473dd3430ce91fd90c1 Author: Lasse Collin Date: 2011-11-03 17:07:22 +0200 xz: Fix xz on EBCDIC systems. Thanks to Chris Donawa. src/xz/coder.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) commit 4ac4923f47cc0ef97dd9ca5cfcc44fc53eeab34a Author: Lasse Collin Date: 2011-10-23 17:09:10 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit ab50ae3ef40c81e5bf613905ca3fd636548b75e7 Author: Lasse Collin Date: 2011-10-23 17:08:14 +0300 liblzma: Fix invalid free() in the threaded encoder. It was triggered if initialization failed e.g. due to running out of memory. Thanks to Arkadiusz Miskiewicz. src/liblzma/common/outqueue.c | 4 ++++ 1 file changed, 4 insertions(+) commit 6b620a0f0813d28c3c544b4ff8cb595b38a6e908 Author: Lasse Collin Date: 2011-10-23 17:05:55 +0300 liblzma: Fix a deadlock in the threaded encoder. It was triggered when reinitializing the encoder, e.g. when encoding two files. src/liblzma/common/stream_encoder_mt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit bd52cf150ecd51e3ab63a9cc1a3cff6a77500178 Author: Lasse Collin Date: 2011-09-06 12:03:41 +0300 Build: Fix "make check" on Windows. tests/Makefile.am | 7 +++++-- windows/build.bash | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) commit 5c5b2256969ac473001b7d67615ed3bd0a54cc82 Author: Lasse Collin Date: 2011-08-09 21:19:13 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit 5b1e1f10741af9e4bbe4cfc3261fb7c7b04f7809 Author: Lasse Collin Date: 2011-08-09 21:16:44 +0300 Workaround unusual SIZE_MAX on SCO OpenServer. src/common/sysdefs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) commit e9ed88126eee86e2511fa42681a5c7104820cf0a Author: Lasse Collin Date: 2011-08-06 20:37:28 +0300 Run the scripts with the correct shell in test_scripts.sh. The scripts are now made executable in the build tree. This way the scripts can be run like programs in test_scripts.sh. Previously test_scripts.sh always used sh but it's not correct if @POSIX_SHELL@ is set to something else by configure. Thanks to Jonathan Nieder for the patch. configure.ac | 8 ++++---- tests/test_scripts.sh | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) commit 1c673e5681720491a74fc4b2992e075f47302c22 Author: Lasse Collin Date: 2011-07-31 11:01:47 +0300 Fix exit status of "xzdiff foo.xz bar.xz". xzdiff was clobbering the exit status from diff in a case statement used to analyze the exit statuses from "xz" when its operands were two compressed files. Save and restore diff's exit status to fix this. The bug is inherited from zdiff in GNU gzip and was fixed there on 2009-10-09. Thanks to Jonathan Nieder for the patch and to Peter Pallinger for reporting the bug. src/scripts/xzdiff.in | 2 ++ tests/Makefile.am | 4 +++- tests/test_scripts.sh | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) commit 324cde7a864f4506c32ae7846d688c359a83fe65 Author: Lasse Collin Date: 2011-06-16 12:15:29 +0300 liblzma: Remove unneeded semicolon. src/liblzma/lz/lz_encoder_hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 492c86345551a51a29bf18e55fe55a5e86f169ce Author: Lasse Collin Date: 2011-05-28 19:24:56 +0300 Build: Make configure print if symbol versioning is enabled or not. configure.ac | 2 ++ 1 file changed, 2 insertions(+) commit fc4d4436969bd4d71b704d400a165875e596034a Author: Lasse Collin Date: 2011-05-28 16:43:26 +0300 Don't call close(-1) in tuklib_open_stdxxx() on error. Thanks to Jim Meyering. src/common/tuklib_open_stdxxx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) commit bd35d903a04c4d388adb4065b0fa271302380895 Author: Lasse Collin Date: 2011-05-28 15:55:39 +0300 liblzma: Use symbol versioning. Symbol versioning is enabled by default on GNU/Linux, other GNU-based systems, and FreeBSD. I'm not sure how stable this is, so it may need backward-incompatible changes before the next release. The idea is that alpha and beta symbols are considered unstable and require recompiling the applications that use those symbols. Once a symbol is stable, it may get extended with new features in ways that don't break compatibility with older ABI & API. The mydist target runs validate_map.sh which should catch some probable problems in liblzma.map. Otherwise I would forget to update the map file for new releases. Makefile.am | 1 + configure.ac | 21 +++++++++ src/liblzma/Makefile.am | 6 +++ src/liblzma/liblzma.map | 105 ++++++++++++++++++++++++++++++++++++++++++++ src/liblzma/validate_map.sh | 68 ++++++++++++++++++++++++++++ 5 files changed, 201 insertions(+) commit afbb244362c9426a37ce4eb9d54aab768da3adad Author: Lasse Collin Date: 2011-05-28 09:46:46 +0300 Translations: Update the Italian translation. Thanks to Milo Casagrande. po/it.po | 365 +++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 216 insertions(+), 149 deletions(-) commit 79bef85e0543c0c3723281c3c817616c6cec343b Author: Lasse Collin Date: 2011-05-28 08:46:04 +0300 Tests: Add a test file for the bug in the previous commit. tests/files/README | 4 ++++ tests/files/bad-1-block_header-6.xz | Bin 0 -> 72 bytes 2 files changed, 4 insertions(+) commit c0297445064951807803457dca1611b3c47e7f0f Author: Lasse Collin Date: 2011-05-27 22:25:44 +0300 xz: Fix error handling in xz -lvv. It could do an invalid free() and read past the end of the uninitialized filters array. src/xz/list.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) commit 8bd91918ac50731f00b1a2a48072980572eb2ff9 Author: Lasse Collin Date: 2011-05-27 22:09:49 +0300 liblzma: Handle allocation failures correctly in lzma_index_init(). Thanks to Jim Meyering. src/liblzma/common/index.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) commit fe00f95828ef5627721b57e054f7eb2d42a2c961 Author: Lasse Collin Date: 2011-05-24 00:23:46 +0300 Build: Fix checking for system-provided SHA-256. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 21b45b9bab541f419712cbfd473ccc31802e0397 Author: Lasse Collin Date: 2011-05-23 18:30:30 +0300 Build: Set GZIP_ENV=-9n in top-level Makefile.am. Makefile.am | 3 +++ 1 file changed, 3 insertions(+) commit 48053e8a4550233af46359024538bff90c870ab1 Author: Lasse Collin Date: 2011-05-22 16:42:11 +0300 Update NEWS for 5.0.3. NEWS | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) commit bba37df2c9e54ad773e15ff00a09d2d6989fb3b2 Author: Lasse Collin Date: 2011-05-21 16:28:44 +0300 Add French translation. It is known that the BCJ filter --help text is only partially translated. po/LINGUAS | 1 + po/fr.po | 864 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 865 insertions(+) commit 4161d7634965a7a287bf208dcd79f6185f448fe8 Author: Lasse Collin Date: 2011-05-21 15:12:10 +0300 xz: Translate also the string used to print the program name. French needs a space before a colon, e.g. "xz : foo error". src/xz/message.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) commit b94aa0c8380cdb18cddb33440d625474c16643cf Author: Lasse Collin Date: 2011-05-21 15:08:44 +0300 liblzma: Try to use SHA-256 from the operating system. If the operating system libc or other base libraries provide SHA-256, use that instead of our own copy. Note that this doesn't use OpenSSL or libgcrypt or such libraries to avoid creating dependencies to other packages. This supports at least FreeBSD, NetBSD, OpenBSD, Solaris, MINIX, and Darwin. They all provide similar but not identical SHA-256 APIs; everyone is a little different. Thanks to Wim Lewis for the original patch, improvements, and testing. configure.ac | 54 +++++++++++++++++++++++++++ src/liblzma/check/Makefile.inc | 2 + src/liblzma/check/check.h | 83 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 131 insertions(+), 8 deletions(-) commit f004128678d43ea10b4a6401aa184cf83252d6ec Author: Lasse Collin Date: 2011-05-17 12:52:18 +0300 Don't use clockid_t in mythread.h when clock_gettime() isn't available. Thanks to Wim Lewis for the patch. src/common/mythread.h | 2 ++ 1 file changed, 2 insertions(+) commit f779516f42ebd2db47a5b7d6143459bf7737cf2f Author: Lasse Collin Date: 2011-05-17 12:26:28 +0300 Update THANKS. THANKS | 3 +++ 1 file changed, 3 insertions(+) commit 830ba587775bb562f6eaf05cad61bf669d1f8892 Author: Lasse Collin Date: 2011-05-17 12:21:33 +0300 Update INSTALL with a note about linker problem on OpenSolaris x86. INSTALL | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) commit ec7106309c8060e9c646dba20c4f15689a0bbb04 Author: Lasse Collin Date: 2011-05-17 12:01:37 +0300 Build: Fix initialization of enable_check_* variables in configure.ac. This doesn't matter much in practice since it is unlikely that anyone would have such environment variable names. Thanks to Wim Lewis. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 4c6e146df99696920f12410fb17754412797ef36 Author: Lasse Collin Date: 2011-05-17 11:54:38 +0300 Add underscores to attributes (__attribute((__foo__))). src/liblzma/common/alone_decoder.c | 2 +- src/liblzma/common/alone_encoder.c | 2 +- src/liblzma/common/block_encoder.c | 2 +- src/liblzma/common/common.c | 2 +- src/liblzma/common/common.h | 2 +- src/liblzma/common/index_decoder.c | 9 +++++---- src/liblzma/common/index_encoder.c | 11 ++++++----- src/liblzma/delta/delta_encoder.c | 2 +- src/liblzma/lz/lz_decoder.c | 2 +- src/liblzma/lz/lz_encoder.c | 2 +- src/liblzma/simple/arm.c | 2 +- src/liblzma/simple/armthumb.c | 2 +- src/liblzma/simple/ia64.c | 2 +- src/liblzma/simple/powerpc.c | 2 +- src/liblzma/simple/simple_coder.c | 2 +- src/liblzma/simple/sparc.c | 2 +- src/lzmainfo/lzmainfo.c | 4 ++-- src/xz/coder.c | 2 +- src/xz/hardware.h | 2 +- src/xz/message.c | 2 +- src/xz/message.h | 18 +++++++++--------- src/xz/options.c | 6 +++--- src/xz/signals.c | 2 +- src/xz/util.h | 6 +++--- src/xzdec/xzdec.c | 6 +++--- 25 files changed, 49 insertions(+), 47 deletions(-) commit 7a480e485938884ef3021b48c3b0b9f9699dc9b6 Author: Lasse Collin Date: 2011-05-01 12:24:23 +0300 xz: Fix input file position when --single-stream is used. Now the following works as you would expect: echo foo | xz > foo.xz echo bar | xz >> foo.xz ( xz -dc --single-stream ; xz -dc --single-stream ) < foo.xz Note that it doesn't work if the input is not seekable or if there is Stream Padding between the concatenated .xz Streams. src/xz/coder.c | 1 + src/xz/file_io.c | 15 +++++++++++++++ src/xz/file_io.h | 13 +++++++++++++ 3 files changed, 29 insertions(+) commit c29e6630c1450c630c4e7b783bdd76515db9004c Author: Lasse Collin Date: 2011-05-01 12:15:51 +0300 xz: Print the maximum number of worker threads in xz -vv. src/xz/coder.c | 4 ++++ 1 file changed, 4 insertions(+) commit 0b77c4a75158ccc416b07d6e81df8ee0abaea720 Author: Lasse Collin Date: 2011-04-19 10:44:48 +0300 Build: Warn if no supported method to detect the number of CPU cores. configure.ac | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) commit e4622df9ab4982f8faa53d85b17be66216175a58 Author: Lasse Collin Date: 2011-04-19 09:55:06 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 9c1b05828a88eff54409760b92162c7cc2c7cff6 Author: Lasse Collin Date: 2011-04-19 09:20:44 +0300 Fix portability problems in mythread.h. Use gettimeofday() if clock_gettime() isn't available (e.g. Darwin). The test for availability of pthread_condattr_setclock() and CLOCK_MONOTONIC was incorrect. Instead of fixing the #ifdefs, use an Autoconf test. That way if there exists a system that supports them but doesn't specify the matching POSIX #defines, the features will still get detected. Don't try to use pthread_sigmask() on OpenVMS. It doesn't have that function. Guard mythread.h against being #included multiple times. configure.ac | 7 +++++++ src/common/mythread.h | 31 +++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) commit 3de00cc75da7b0e7b65e84c62b5351e231f501e9 Author: Lasse Collin Date: 2011-04-18 19:35:49 +0300 Update THANKS. THANKS | 2 ++ 1 file changed, 2 insertions(+) commit bd5002f5821e3d1b04f2f56989e4a19318e73633 Author: Martin Väth Date: 2011-04-15 04:54:49 -0400 xzgrep: fix typo in $0 parsing Reported-by: Diego Elio Pettenò Signed-off-by: Martin Väth Signed-off-by: Mike Frysinger src/scripts/xzgrep.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 6ef4eabc0acc49e1bb9dc68064706e19fa9fcf48 Author: Lasse Collin Date: 2011-04-12 12:48:31 +0300 Bump the version number to 5.1.1alpha and liblzma soname to 5.0.99. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) commit 9a4377be0d21e597c66bad6c7452873aebfb3c1c Author: Lasse Collin Date: 2011-04-12 12:42:37 +0300 Put the unstable APIs behind #ifdef LZMA_UNSTABLE. This way people hopefully won't complain if these APIs change and break code that used an older API. src/liblzma/api/lzma/container.h | 4 ++++ src/liblzma/common/common.h | 2 ++ src/xz/private.h | 2 ++ 3 files changed, 8 insertions(+) commit 3e321a3acd50002cf6fdfd259e910f56d3389bc3 Author: Lasse Collin Date: 2011-04-12 11:59:49 +0300 Remove doubled words from documentation and comments. Spot candidates by running these commands: git ls-files |xargs perl -0777 -n \ -e 'while (/\b(then?|[iao]n|i[fst]|but|f?or|at|and|[dt]o)\s+\1\b/gims)' \ -e '{$n=($` =~ tr/\n/\n/ + 1); ($v=$&)=~s/\n/\\n/g; print "$ARGV:$n:$v\n"}' Thanks to Jim Meyering for the original patch. doc/lzma-file-format.txt | 4 ++-- src/liblzma/common/alone_encoder.c | 2 +- src/liblzma/lzma/lzma2_encoder.c | 2 +- src/xz/file_io.c | 2 +- src/xz/xz.1 | 2 +- windows/INSTALL-Windows.txt | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) commit d91a84b534b012d19474f2fda1fbcaef873e1ba4 Author: Lasse Collin Date: 2011-04-12 11:46:01 +0300 Update NEWS. NEWS | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) commit 14e6ad8cfe0165c1a8beeb5b2a1536558b29b0a1 Author: Lasse Collin Date: 2011-04-12 11:45:40 +0300 Update TODO. TODO | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) commit 70e750f59793f9b5cd306a5adce9b8e427739e04 Author: Lasse Collin Date: 2011-04-12 11:08:55 +0300 xz: Update the man page about threading. src/xz/xz.1 | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) commit 24e0406c0fb7494d2037dec033686faf1bf67068 Author: Lasse Collin Date: 2011-04-11 22:06:03 +0300 xz: Add support for threaded compression. src/xz/args.c | 3 +- src/xz/coder.c | 202 +++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 125 insertions(+), 80 deletions(-) commit de678e0c924aa79a19293a8a6ed82e8cb6572a42 Author: Lasse Collin Date: 2011-04-11 22:03:30 +0300 liblzma: Add lzma_stream_encoder_mt() for threaded compression. This is the simplest method to do threading, which splits the uncompressed data into blocks and compresses them independently from each other. There's room for improvement especially to reduce the memory usage, but nevertheless, this is a good start. configure.ac | 1 + src/liblzma/api/lzma/container.h | 163 +++++ src/liblzma/common/Makefile.inc | 7 + src/liblzma/common/common.c | 9 +- src/liblzma/common/common.h | 14 + src/liblzma/common/outqueue.c | 180 ++++++ src/liblzma/common/outqueue.h | 155 +++++ src/liblzma/common/stream_encoder_mt.c | 1011 ++++++++++++++++++++++++++++++++ 8 files changed, 1539 insertions(+), 1 deletion(-) commit 25fe729532cdf4b8fed56a4519b73cf31efaec50 Author: Lasse Collin Date: 2011-04-11 21:15:07 +0300 liblzma: Add the forgotten lzma_lzma2_block_size(). This should have been in 5eefc0086d24a65e136352f8c1d19cefb0cbac7a. src/liblzma/lzma/lzma2_encoder.c | 10 ++++++++++ src/liblzma/lzma/lzma2_encoder.h | 2 ++ 2 files changed, 12 insertions(+) commit 91afb785a1dee34862078d9bf844ef12b8cc3e35 Author: Lasse Collin Date: 2011-04-11 21:04:13 +0300 liblzma: Document lzma_easy_(enc|dec)oder_memusage() better too. src/liblzma/api/lzma/container.h | 9 +++++++++ 1 file changed, 9 insertions(+) commit 4a9905302a9e4a1601ae09d650d3f08ce98ae9ee Author: Lasse Collin Date: 2011-04-11 20:59:07 +0300 liblzma: Document lzma_raw_(enc|dec)oder_memusage() better. It didn't mention the return value that is used if an error occurs. src/liblzma/api/lzma/filter.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit 0badb0b1bd649163322783b0bd9e590b4bc7a93d Author: Lasse Collin Date: 2011-04-11 19:28:18 +0300 liblzma: Use memzero() to initialize supported_actions[]. This is cleaner and makes it simpler to add new members to lzma_action enumeration. src/liblzma/common/common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) commit a7934c446a58e20268689899d2a39f50e571f251 Author: Lasse Collin Date: 2011-04-11 19:26:27 +0300 liblzma: API comment about lzma_allocator with threaded coding. src/liblzma/api/lzma/base.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) commit 5eefc0086d24a65e136352f8c1d19cefb0cbac7a Author: Lasse Collin Date: 2011-04-11 19:16:30 +0300 liblzma: Add an internal function lzma_mt_block_size(). This is based lzma_chunk_size() that was included in some development version of liblzma. src/liblzma/common/filter_encoder.c | 46 ++++++++++++++++++------------------- src/liblzma/common/filter_encoder.h | 4 ++-- 2 files changed, 24 insertions(+), 26 deletions(-) commit d1199274758049fc523d98c5b860ff814a799eec Author: Lasse Collin Date: 2011-04-11 13:59:50 +0300 liblzma: Don't create an empty Block in lzma_stream_buffer_encode(). Empty Block was created if the input buffer was empty. Empty Block wastes a few bytes of space, but more importantly it triggers a bug in XZ Utils 5.0.1 and older when trying to decompress such a file. 5.0.1 and older consider such files to be corrupt. I thought that no encoder creates empty Blocks when releasing 5.0.2 but I was wrong. src/liblzma/common/stream_buffer_encoder.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) commit 3b22fc2c87ec85fcdd385c163b68fc49c97aa848 Author: Lasse Collin Date: 2011-04-11 13:28:40 +0300 liblzma: Fix API docs to mention LZMA_UNSUPPORTED_CHECK. This return value was missing from the API comments of four functions. src/liblzma/api/lzma/block.h | 1 + src/liblzma/api/lzma/container.h | 3 +++ 2 files changed, 4 insertions(+) commit 71b9380145dccf001f22e66a06b9d508905c25ce Author: Lasse Collin Date: 2011-04-11 13:21:28 +0300 liblzma: Validate encoder arguments better. The biggest problem was that the integrity check type wasn't validated, and e.g. lzma_easy_buffer_encode() would create a corrupt .xz Stream if given an unsupported Check ID. Luckily applications don't usually try to use an unsupport Check ID, so this bug is unlikely to cause many real-world problems. src/liblzma/common/block_buffer_encoder.c | 18 ++++++++++++------ src/liblzma/common/block_encoder.c | 5 +++++ src/liblzma/common/stream_buffer_encoder.c | 3 +++ 3 files changed, 20 insertions(+), 6 deletions(-) commit ec7e3dbad704268825fc48f0bdd4577bc46b4f13 Author: Lasse Collin Date: 2011-04-11 09:57:30 +0300 xz: Move the description of --block-size in --long-help. src/xz/message.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit cd3086ff443bb282bdf556919c28b3e3cbed8169 Author: Lasse Collin Date: 2011-04-11 09:55:35 +0300 Docs: Document --single-stream and --block-size. src/xz/xz.1 | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) commit fb64a4924334e3c440865710990fe08090f2fed0 Author: Lasse Collin Date: 2011-04-11 09:27:57 +0300 liblzma: Make lzma_stream_encoder_init() static (second try). It's an internal function and it's not needed by anything outside stream_encoder.c. src/liblzma/common/Makefile.inc | 1 - src/liblzma/common/easy_encoder.c | 1 - src/liblzma/common/stream_encoder.c | 13 ++++++------- src/liblzma/common/stream_encoder.h | 23 ----------------------- 4 files changed, 6 insertions(+), 32 deletions(-) commit a34730cf6af4d33a4057914e57227b6dfde6567e Author: Lasse Collin Date: 2011-04-11 08:31:42 +0300 Revert "liblzma: Make lzma_stream_encoder_init() static." This reverts commit 352ac82db5d3f64585c07b39e4759388dec0e4d7. I don't know what I was thinking. src/liblzma/common/Makefile.inc | 1 + src/liblzma/common/stream_encoder.c | 9 +++++---- src/liblzma/common/stream_encoder.h | 23 +++++++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) commit 9f0a806aef7ea79718e3f1f2baf3564295229a27 Author: Lasse Collin Date: 2011-04-10 21:23:21 +0300 Revise mythread.h. This adds: - mythread_sync() macro to create synchronized blocks - mythread_cond structure and related functions and macros for condition variables with timed waiting using a relative timeout - mythread_create() to create a thread with all signals blocked Some of these wouldn't need to be inline functions, but I'll keep them this way for now for simplicity. For timed waiting on a condition variable, librt is now required on some systems to use clock_gettime(). configure.ac was updated to handle this. configure.ac | 1 + src/common/mythread.h | 200 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 181 insertions(+), 20 deletions(-) commit 352ac82db5d3f64585c07b39e4759388dec0e4d7 Author: Lasse Collin Date: 2011-04-10 20:37:36 +0300 liblzma: Make lzma_stream_encoder_init() static. It's an internal function and it's not needed by anything outside stream_encoder.c. src/liblzma/common/Makefile.inc | 1 - src/liblzma/common/stream_encoder.c | 9 ++++----- src/liblzma/common/stream_encoder.h | 23 ----------------------- 3 files changed, 4 insertions(+), 29 deletions(-) commit 9e807fe3fe79618ac48f58207cf7082ea20a6928 Author: Lasse Collin Date: 2011-04-10 14:58:10 +0300 DOS: Update the docs and include notes about 8.3 filenames. dos/{README => INSTALL.txt} | 13 +---- dos/README.txt | 123 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 11 deletions(-) commit ebd54dbd6e481d31e80757f900ac8109ad1423c6 Author: Lasse Collin Date: 2011-04-10 13:09:42 +0300 xz/DOS: Add experimental 8.3 filename support. This is incompatible with the 8.3 support patch made by Juan Manuel Guerrero. I think this one is nicer, but I need to get feedback from DOS users before saying that this is the final version of 8.3 filename support. src/xz/suffix.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 167 insertions(+), 9 deletions(-) commit cd4fe97852bcaeffe674ee51b4613709292a0972 Author: Lasse Collin Date: 2011-04-10 12:47:47 +0300 xz/DOS: Be more careful with the destination file. Try to avoid overwriting the source file if --force is used and the generated destination filename refers to the source file. This can happen with 8.3 filenames where extra characters are ignored. If the generated output file refers to a special file like "con" or "prn", refuse to write to it even if --force is used. src/xz/file_io.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) commit 607f9f98ae5ef6d49f4c21c806d462bf6b3d6796 Author: Lasse Collin Date: 2011-04-09 18:29:30 +0300 Update THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit fca396b37410d272b754843a5dc13847be443a3a Author: Lasse Collin Date: 2011-04-09 18:28:58 +0300 liblzma: Add missing #ifdefs to filter_common.c. Passing --disable-decoders to configure broke a few encoders due to missing #ifdefs in filter_common.c. Thanks to Jason Gorski for the patch. src/liblzma/common/filter_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit b03f6cd3ebadd675f2cc9d518cb26fa860269447 Author: Lasse Collin Date: 2011-04-09 15:24:59 +0300 xz: Avoid unneeded fstat() on DOS-like systems. src/xz/file_io.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) commit 335fe260a81f61ec99ff5940df733b4c50aedb7c Author: Lasse Collin Date: 2011-04-09 15:11:13 +0300 xz: Minor internal changes to handling of --threads. Now it always defaults to one thread. Maybe this will change again if a threading method is added that doesn't affect memory usage. src/xz/args.c | 4 ++-- src/xz/hardware.c | 24 ++++++++++++------------ src/xz/hardware.h | 9 ++++----- 3 files changed, 18 insertions(+), 19 deletions(-) commit 9edd6ee895fbe71d245a173f48e511f154a99875 Author: Lasse Collin Date: 2011-04-08 17:53:05 +0300 xz: Change size_t to uint32_t in a few places. src/xz/coder.c | 6 +++--- src/xz/coder.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) commit 411013ea4506a6df24d35a060fcbd73a57b73eb3 Author: Lasse Collin Date: 2011-04-08 17:48:41 +0300 xz: Fix a typo in a comment. src/xz/coder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b34c5ce4b22e8d7b81f9895d15054af41d17f805 Author: Lasse Collin Date: 2011-04-05 22:41:33 +0300 liblzma: Use TUKLIB_GNUC_REQ to check GCC version in sha256.c. src/liblzma/check/sha256.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) commit db33117cc85c17e0b897b5312bd5eb43aac41c03 Author: Lasse Collin Date: 2011-04-05 17:12:20 +0300 Build: Upgrade m4/acx_pthread.m4 to the latest version. It was renamed to ax_pthread.m4 in Autoconf Archive. configure.ac | 2 +- m4/{acx_pthread.m4 => ax_pthread.m4} | 170 ++++++++++++++++++----------------- 2 files changed, 88 insertions(+), 84 deletions(-) commit 1039bfcfc098b69d56ecb39d198a092552eacf6d Author: Lasse Collin Date: 2011-04-05 15:27:26 +0300 xz: Use posix_fadvise() if it is available. configure.ac | 3 +++ src/xz/file_io.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) commit 1ef3cf44a8eb9512480af4482a5232ea08363b14 Author: Lasse Collin Date: 2011-04-05 15:13:29 +0300 xz: Call lzma_end(&strm) before exiting if debugging is enabled. src/xz/coder.c | 10 ++++++++++ src/xz/coder.h | 5 +++++ src/xz/main.c | 4 ++++ 3 files changed, 19 insertions(+) commit bd432015d33dcade611d297bc01eb0700088ef6c Author: Lasse Collin Date: 2011-04-02 14:49:56 +0300 liblzma: Fix a memory leak in stream_encoder.c. It leaks old filter options structures (hundred bytes or so) every time the lzma_stream is reinitialized. With the xz tool, this happens when compressing multiple files. src/liblzma/common/stream_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 16889013214e7620d204b6e6c1bf9f3103a13655 Author: Lasse Collin Date: 2011-04-01 08:47:20 +0300 Updated NEWS for 5.0.2. NEWS | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) commit 85cdf7dd4e97b078e7b929e47f55a7f1da36010f Author: Lasse Collin Date: 2011-03-31 15:06:58 +0300 Update INSTALL with another note about IRIX. INSTALL | 4 ++++ 1 file changed, 4 insertions(+) commit c3f4995586873d6a4fb7e451010a128571a9a370 Author: Lasse Collin Date: 2011-03-31 12:22:55 +0300 Tests: Add a new file to test empty LZMA2 streams. tests/files/README | 4 ++++ tests/files/good-1-lzma2-5.xz | Bin 0 -> 52 bytes 2 files changed, 4 insertions(+) commit 0d21f49a809dc2088da6cc0da7f948404df7ecfa Author: Lasse Collin Date: 2011-03-31 11:54:48 +0300 liblzma: Fix decoding of LZMA2 streams having no uncompressed data. The decoder considered empty LZMA2 streams to be corrupt. This shouldn't matter much with .xz files, because no encoder creates empty LZMA2 streams in .xz. This bug is more likely to cause problems in applications that use raw LZMA2 streams. src/liblzma/lzma/lzma2_decoder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) commit 40277998cb9bad564ce4827aff152e6e1c904dfa Author: Lasse Collin Date: 2011-03-24 01:42:49 +0200 Scripts: Better fix for xzgrep. Now it uses "grep -q". Thanks to Gregory Margo. src/scripts/xzgrep.in | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) commit 2118733045ad0ca183a3f181a0399baf876983a6 Author: Lasse Collin Date: 2011-03-24 01:22:18 +0200 Updated THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit c7210d9a3fca6f31a57208bfddfc9ab20a2e097a Author: Lasse Collin Date: 2011-03-24 01:21:32 +0200 Scripts: Fix xzgrep -l. It didn't work at all. It tried to use the -q option for grep, but it appended it after "--". This works around it by redirecting to /dev/null. The downside is that this can be slower with big files compared to proper use of "grep -q". Thanks to Gregory Margo. src/scripts/xzgrep.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit 4eb83e32046a6d670862bc91c3d82530963b455e Author: Lasse Collin Date: 2011-03-19 13:08:22 +0200 Scripts: Add lzop (.lzo) support to xzdiff and xzgrep. src/scripts/xzdiff.1 | 6 ++++-- src/scripts/xzdiff.in | 22 ++++++++++++++-------- src/scripts/xzgrep.1 | 11 +++++++---- src/scripts/xzgrep.in | 5 +++-- 4 files changed, 28 insertions(+), 16 deletions(-) commit 923b22483bd9356f3219b2b784d96f455f4dc499 Author: Lasse Collin Date: 2011-03-18 19:10:30 +0200 xz: Add --block-size=SIZE. This uses LZMA_FULL_FLUSH every SIZE bytes of input. Man page wasn't updated yet. src/xz/args.c | 7 +++++++ src/xz/coder.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- src/xz/coder.h | 3 +++ src/xz/message.c | 4 ++++ 4 files changed, 54 insertions(+), 10 deletions(-) commit 57597d42ca1740ad506437be168d800a50f1a0ad Author: Lasse Collin Date: 2011-03-18 18:19:19 +0200 xz: Add --single-stream. This can be useful when there is garbage after the compressed stream (.xz, .lzma, or raw stream). Man page wasn't updated yet. src/xz/args.c | 6 ++++++ src/xz/coder.c | 11 +++++++++-- src/xz/coder.h | 3 +++ src/xz/message.c | 6 +++++- 4 files changed, 23 insertions(+), 3 deletions(-) commit 96f94bc925d579a700147fa5d7793b64d69cfc18 Author: Lasse Collin Date: 2011-02-04 22:49:31 +0200 xz: Clean up suffix.c. struct suffix_pair isn't needed in compresed_name() so get rid of it there. src/xz/suffix.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) commit 8930c7ae3f82bdae15aa129f01de08be23d7e8d7 Author: Lasse Collin Date: 2011-02-04 11:29:47 +0200 xz: Check if the file already has custom suffix when compressing. Now "xz -S .test foo.test" refuses to compress the file because it already has the suffix .test. The man page had it documented this way already. src/xz/suffix.c | 9 +++++++++ 1 file changed, 9 insertions(+) commit 940d5852c6cf08abccc6befd9d1b5411c9076a58 Author: Lasse Collin Date: 2011-02-02 23:01:51 +0200 Updated THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 4ebe65f839613f27f127bab7b8c347d982330ee3 Author: Lasse Collin Date: 2011-02-02 23:00:33 +0200 Translations: Add Polish translation. Thanks to Jakub Bogusz. po/LINGUAS | 1 + po/pl.po | 825 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 826 insertions(+) commit fc1d292dca1925dfd17174f443f91a696ecd5bf8 Author: Lasse Collin Date: 2011-02-02 22:24:00 +0200 Updated THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit 6dd061adfd2775428b079eb03d6fd47d7c0f1ffe Merge: 9d542ce 5fbce0b Author: Lasse Collin Date: 2011-02-06 20:13:01 +0200 Merge commit '5fbce0b8d96dc96775aa0215e3581addc830e23d' commit 5fbce0b8d96dc96775aa0215e3581addc830e23d Author: Lasse Collin Date: 2011-01-28 20:16:57 +0200 Update NEWS for 5.0.1. NEWS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) commit 03ebd1bbb314f9f204940219a835c883bf442475 Author: Lasse Collin Date: 2011-01-26 12:19:08 +0200 xz: Fix --force on setuid/setgid/sticky and multi-hardlink files. xz didn't compress setuid/setgid/sticky files and files with multiple hard links even with --force. This bug was introduced in 23ac2c44c3ac76994825adb7f9a8f719f78b5ee4. Thanks to Charles Wilson. src/xz/file_io.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) commit 9d542ceebcbe40b174169c132ccfcdc720ca7089 Merge: 4f2c69a 7bd0a5e Author: Lasse Collin Date: 2011-01-19 11:45:35 +0200 Merge branch 'v5.0' commit 7bd0a5e7ccc354f7c2e95c8bc27569c820f6a136 Author: Lasse Collin Date: 2011-01-18 21:25:24 +0200 Updated THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit f71c4e16e913f660977526f0ef8d2acdf458d7c9 Author: Lasse Collin Date: 2011-01-18 21:23:50 +0200 Add alloc_size and malloc attributes to a few functions. Thanks to Cristian Rodríguez for the original patch. src/common/sysdefs.h | 6 ++++++ src/liblzma/common/common.h | 2 +- src/xz/util.h | 5 +++-- 3 files changed, 10 insertions(+), 3 deletions(-) commit 316cbe24465143edde8f6ffb7532834b7b2ea93f Author: Lasse Collin Date: 2010-12-13 16:36:33 +0200 Scripts: Fix gzip and bzip2 support in xzdiff. src/scripts/xzdiff.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) commit 4f2c69a4e3e0aee2e37b0b1671d34086e20c8ac6 Merge: adb89e6 9311774 Author: Lasse Collin Date: 2010-12-12 23:13:22 +0200 Merge branch 'v5.0' commit 9311774c493c19deab51ded919dcd2e9c4aa2829 Author: Lasse Collin Date: 2010-12-12 21:23:55 +0200 Build: Enable ASM on DJGPP by default. configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit 4a42aaee282fc73b482581684d65110506d5efdd Author: Lasse Collin Date: 2010-12-12 16:09:42 +0200 Updated THANKS. THANKS | 1 + 1 file changed, 1 insertion(+) commit ce56f63c41ee210e6308090eb6d49221fdf67d6c Author: Lasse Collin Date: 2010-12-12 16:07:11 +0200 Add missing PRIx32 and PRIx64 compatibility definitions. This fixes portability to systems that lack C99 inttypes.h. Thanks to Juan Manuel Guerrero. src/common/sysdefs.h | 9 +++++++++ 1 file changed, 9 insertions(+) commit e6baedddcf54e7da049ebc49183565b99facd4c7 Author: Lasse Collin Date: 2010-12-12 14:50:04 +0200 DOS-like: Treat \ and : as directory separators in addition to /. Juan Manuel Guerrero had fixed this in his XZ Utils port to DOS/DJGPP. The bug affects also Windows and OS/2. src/xz/suffix.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) commit adb89e68d43a4cadb0c215b45ef7a75737c9c3ec Merge: 7c24e0d b7afd3e Author: Lasse Collin Date: 2010-12-07 18:53:04 +0200 Merge branch 'v5.0' commit b7afd3e22a8fac115b75c738d40d3eb1de7e286f Author: Lasse Collin Date: 2010-12-07 18:52:04 +0200 Translations: Fix Czech translation of "sparse file". Thanks to Petr Hubený and Marek Černocký. po/cs.po | 88 ++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) commit 7c24e0d1b8a2e86e9263b0d56d39621e01aed7af Merge: b4d42f1 3e56470 Author: Lasse Collin Date: 2010-11-15 14:33:01 +0200 Merge branch 'v5.0' commit 3e564704bc6f463cb2db11e3f3f0dbd71d85992e Author: Lasse Collin Date: 2010-11-15 14:28:26 +0200 liblzma: Document the return value of lzma_lzma_preset(). src/liblzma/api/lzma/lzma.h | 3 +++ 1 file changed, 3 insertions(+) commit 2964d8d691ed92abdcf214888d79ad6d79774735 Author: Jonathan Nieder Date: 2010-11-12 15:22:13 -0600 Simplify paths in generated API docs Currently the file list generated by Doxygen has src/ at the beginning of each path. Paths like common/sysdefs.h and liblzma/api/lzma.h are easier to read without such a prefix. Builds from a separate build directory with mkdir build cd build ../configure doxygen Doxyfile include an even longer prefix /home/someone/src/xz/src; this patch has the nice side-effect of eliminating that prefix, too. Fixes: http://bugs.debian.org/572273 Doxyfile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) commit b4d42f1a7120e2cefeb2f14425efe2ca6db85416 Author: Anders F Bjorklund Date: 2010-11-05 12:56:11 +0100 add build script for macosx universal macosx/build.sh | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) commit 15ee6935abe4a2fc76639ee342ca2e69af3e0ad6 Author: Lasse Collin Date: 2010-11-04 18:31:40 +0200 Update the copies of GPLv2 and LGPLv2.1 from gnu.org. There are only a few white space changes. COPYING.GPLv2 | 14 +++++++------- COPYING.LGPLv2.1 | 16 +++++++--------- 2 files changed, 14 insertions(+), 16 deletions(-) commit 8e355f7fdbeee6fe394eb02a28f267ce99a882a2 Merge: 974ebe6 37c2565 Author: Lasse Collin Date: 2010-10-26 15:53:06 +0300 Merge branch 'v5.0' commit 37c25658efd25b034266daf87cd381d20d1df776 Author: Lasse Collin Date: 2010-10-26 15:48:48 +0300 Build: Copy the example programs to $docdir/examples. The example programs by Daniel Mealha Cabrita were included in the git repository, but I had forgot to add them to Makefile.am. Thus, they didn't get included in the source package at all by "make dist". Makefile.am | 5 +++++ windows/build.bash | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) commit 974ebe63497bdf0d262e06474f0dd5a70b1dd000 Author: Lasse Collin Date: 2010-10-26 10:36:41 +0300 liblzma: Rename a few variables and constants. This has no semantic changes. I find the new names slightly more logical and they match the names that are already used in XZ Embedded. The name fastpos wasn't changed (not worth the hassle). src/liblzma/lzma/fastpos.h | 55 +++++------ src/liblzma/lzma/lzma2_encoder.c | 2 +- src/liblzma/lzma/lzma_common.h | 45 ++++----- src/liblzma/lzma/lzma_decoder.c | 58 +++++------ src/liblzma/lzma/lzma_encoder.c | 56 +++++------ src/liblzma/lzma/lzma_encoder_optimum_fast.c | 9 +- src/liblzma/lzma/lzma_encoder_optimum_normal.c | 128 ++++++++++++------------- src/liblzma/lzma/lzma_encoder_private.h | 16 ++-- 8 files changed, 183 insertions(+), 186 deletions(-) commit 7c427ec38d016c0070a42315d752857e33792fc4 Author: Lasse Collin Date: 2010-10-25 12:59:25 +0300 Bump version 5.1.0alpha. src/liblzma/api/lzma/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) commit e45929260cd902036efd40c5610a8d0a50d5712b Author: Lasse Collin Date: 2010-10-23 17:25:52 +0300 Build: Fix mydist rule when .git doesn't exist. Makefile.am | 1 + 1 file changed, 1 insertion(+) commit 6e1326fcdf6b6209949be57cfe3ad4b781b65168 Author: Lasse Collin Date: 2010-10-23 14:15:35 +0300 Add NEWS for 5.0.0. NEWS | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) commit b667a3ef6338a2c1db7b7706b1f6c99ea392221c Author: Lasse Collin Date: 2010-10-23 14:02:53 +0300 Bump version to 5.0.0 and liblzma version-info to 5:0:0. src/liblzma/Makefile.am | 2 +- src/liblzma/api/lzma/version.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/contrib/xz/README b/contrib/xz/README index 3f0c38dca6b8..110d242966ef 100644 --- a/contrib/xz/README +++ b/contrib/xz/README @@ -1,236 +1,236 @@ XZ Utils ======== 0. Overview 1. Documentation 1.1. Overall documentation 1.2. Documentation for command-line tools 1.3. Documentation for liblzma 2. Version numbering 3. Reporting bugs 4. Translations 5. Other implementations of the .xz format 6. Contact information 0. Overview ----------- XZ Utils provide a general-purpose data-compression library plus command-line tools. The native file format is the .xz format, but also the legacy .lzma format is supported. The .xz format supports multiple compression algorithms, which are called "filters" in the context of XZ Utils. The primary filter is currently LZMA2. With typical files, XZ Utils create about 30 % smaller files than gzip. To ease adapting support for the .xz format into existing applications and scripts, the API of liblzma is somewhat similar to the API of the popular zlib library. For the same reason, the command-line tool xz has a command-line syntax similar to that of gzip. When aiming for the highest compression ratio, the LZMA2 encoder uses a lot of CPU time and may use, depending on the settings, even hundreds of megabytes of RAM. However, in fast modes, the LZMA2 encoder competes with bzip2 in compression speed, RAM usage, and compression ratio. LZMA2 is reasonably fast to decompress. It is a little slower than gzip, but a lot faster than bzip2. Being fast to decompress means that the .xz format is especially nice when the same file will be decompressed very many times (usually on different computers), which is the case e.g. when distributing software packages. In such situations, it's not too bad if the compression takes some time, since that needs to be done only once to benefit many people. With some file types, combining (or "chaining") LZMA2 with an additional filter can improve the compression ratio. A filter chain may contain up to four filters, although usually only one or two are used. For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2 in the filter chain can improve compression ratio of executable files. Since the .xz format allows adding new filter IDs, it is possible that some day there will be a filter that is, for example, much faster to compress than LZMA2 (but probably with worse compression ratio). Similarly, it is possible that some day there is a filter that will compress better than LZMA2. XZ Utils supports multithreaded compression. XZ Utils doesn't support multithreaded decompression yet. It has been planned though and taken into account when designing the .xz file format. In the future, files that were created in threaded mode can be decompressed in threaded mode too. 1. Documentation ---------------- 1.1. Overall documentation README This file INSTALL.generic Generic install instructions for those not familiar with packages using GNU Autotools INSTALL Installation instructions specific to XZ Utils PACKAGERS Information to packagers of XZ Utils COPYING XZ Utils copyright and license information COPYING.GPLv2 GNU General Public License version 2 COPYING.GPLv3 GNU General Public License version 3 COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 AUTHORS The main authors of XZ Utils THANKS Incomplete list of people who have helped making this software NEWS User-visible changes between XZ Utils releases ChangeLog Detailed list of changes (commit log) TODO Known bugs and some sort of to-do list Note that only some of the above files are included in binary packages. 1.2. Documentation for command-line tools The command-line tools are documented as man pages. In source code releases (and possibly also in some binary packages), the man pages are also provided in plain text (ASCII only) and PDF formats in the directory "doc/man" to make the man pages more accessible to those whose operating system doesn't provide an easy way to view man pages. 1.3. Documentation for liblzma The liblzma API headers include short docs about each function and data type as Doxygen tags. These docs should be quite OK as a quick reference. There are a few example/tutorial programs that should help in getting started with liblzma. In the source package the examples are in "doc/examples" and in binary packages they may be under "examples" in the same directory as this README. Since the liblzma API has similarities to the zlib API, some people may find it useful to read the zlib docs and tutorial too: http://zlib.net/manual.html http://zlib.net/zlib_how.html 2. Version numbering -------------------- The version number format of XZ Utils is X.Y.ZS: - X is the major version. When this is incremented, the library API and ABI break. - Y is the minor version. It is incremented when new features are added without breaking the existing API or ABI. An even Y indicates a stable release and an odd Y indicates unstable (alpha or beta version). - Z is the revision. This has a different meaning for stable and unstable releases: * Stable: Z is incremented when bugs get fixed without adding any new features. This is intended to be convenient for downstream distributors that want bug fixes but don't want any new features to minimize the risk of introducing new bugs. * Unstable: Z is just a counter. API or ABI of features added in earlier unstable releases having the same X.Y may break. - S indicates stability of the release. It is missing from the stable releases, where Y is an even number. When Y is odd, S is either "alpha" or "beta" to make it very clear that such versions are not stable releases. The same X.Y.Z combination is not used for more than one stability level, i.e. after X.Y.Zalpha, the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta. 3. Reporting bugs ----------------- Naturally it is easiest for me if you already know what causes the unexpected behavior. Even better if you have a patch to propose. However, quite often the reason for unexpected behavior is unknown, so here are a few things to do before sending a bug report: 1. Try to create a small example how to reproduce the issue. 2. Compile XZ Utils with debugging code using configure switches --enable-debug and, if possible, --disable-shared. If you are using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting binaries. 3. Turn on core dumps. The exact command depends on your shell; for example in GNU bash it is done with "ulimit -c unlimited", and in tcsh with "limit coredumpsize unlimited". 4. Try to reproduce the suspected bug. If you get "assertion failed" message, be sure to include the complete message in your bug report. If the application leaves a coredump, get a backtrace using gdb: $ gdb /path/to/app-binary # Load the app to the debugger. (gdb) core core # Open the coredump. (gdb) bt # Print the backtrace. Copy & paste to bug report. (gdb) quit # Quit gdb. Report your bug via email or IRC (see Contact information below). Don't send core dump files or any executables. If you have a small example file(s) (total size less than 256 KiB), please include it/them as an attachment. If you have bigger test files, put them online somewhere and include a URL to the file(s) in the bug report. Always include the exact version number of XZ Utils in the bug report. If you are using a snapshot from the git repository, use "git describe" to get the exact snapshot version. If you are using XZ Utils shipped in an operating system distribution, mention the distribution name, distribution version, and exact xz package version; if you cannot repeat the bug with the code compiled from unpatched source code, you probably need to report a bug to your distribution's bug tracking system. 4. Translations --------------- The xz command line tool and all man pages can be translated. The translations are handled via the Translation Project. If you wish to help translating xz, please join the Translation Project: https://translationproject.org/html/translators.html Several strings will change in a future version of xz so if you wish to start a new translation, look at the code in the xz git - repostiory instead of a 5.2.x release. + repository instead of a 5.2.x release. 5. Other implementations of the .xz format ------------------------------------------ 7-Zip and the p7zip port of 7-Zip support the .xz format starting from the version 9.00alpha. http://7-zip.org/ http://p7zip.sourceforge.net/ XZ Embedded is a limited implementation written for use in the Linux kernel, but it is also suitable for other embedded use. https://tukaani.org/xz/embedded.html 6. Contact information ---------------------- If you have questions, bug reports, patches etc. related to XZ Utils, contact Lasse Collin (in Finnish or English). I'm sometimes slow at replying. If you haven't got a reply within two weeks, assume that your email has got lost and resend it or use IRC. You can find me also from #tukaani on Freenode; my nick is Larhzu. The channel tends to be pretty quiet, so just ask your question and someone may wake up. diff --git a/contrib/xz/THANKS b/contrib/xz/THANKS index 4301f205c3f0..6b7a074220a7 100644 --- a/contrib/xz/THANKS +++ b/contrib/xz/THANKS @@ -1,134 +1,151 @@ Thanks ====== Some people have helped more, some less, but nevertheless everyone's help has been important. :-) In alphabetical order: - Mark Adler - H. Peter Anvin - Jeff Bastian - Nelson H. F. Beebe + - Karl Beldan - Karl Berry - Anders F. Björklund - Emmanuel Blot - Melanie Blower + - Alexander Bluhm - Martin Blumenstingl - Ben Boeckel - Jakub Bogusz + - Adam Borowski - Maarten Bosmans - Trent W. Buck + - Kevin R. Bulgrien - James Buren - David Burklund - Daniel Mealha Cabrita - Milo Casagrande - Marek Černocký - Tomer Chachamu + - Vitaly Chikunov - Antoine Cœur - Gabi Davar - Chris Donawa - Andrew Dudman - Markus Duft - İsmail Dönmez + - Paul Eggert - Robert Elz - Gilles Espinasse - Denis Excoffier - Michael Felt - Michael Fox - Mike Frysinger - Daniel Richard G. + - Bjarni Ingi Gislason - Bill Glessner - Jason Gorski - Juan Manuel Guerrero - Diederik de Haas - Joachim Henke - Christian Hesse - Vincenzo Innocente - Peter Ivanov - Jouk Jansen - Jun I Jin - Kiyoshi Kanazawa - Per Øyvind Karlsen - Thomas Klausner - Richard Koch - Ville Koskinen - Jan Kratochvil - Christian Kujau - Stephan Kulow - Peter Lawler - James M Leddy - Hin-Tak Leung - Andraž 'ruskie' Levstik - Cary Lewis - Wim Lewis - Xin Li - Eric Lindblad - Lorenzo De Liso + - H.J. Lu - Bela Lubkin - Gregory Margo - Julien Marrec + - Ed Maste - Martin Matuška + - Ivan A. Melnikov - Jim Meyering - Arkadiusz Miskiewicz - Conley Moorhous - Rafał Mużyło - Adrien Nader - Evan Nemerson - Hongbo Ni - Jonathan Nieder - Andre Noll - Peter O'Gorman + - Daniel Packard - Filip Palian - Peter Pallinger - Rui Paulo - Igor Pavlov - Diego Elio Pettenò - Elbert Pol - Mikko Pouru - Rich Prohaska - Trần Ngọc Quân - Pavel Raiskup - Ole André Vadla Ravnås + - Eric S. Raymond - Robert Readman - Bernhard Reutner-Fischer - - Eric S. Raymond + - Markus Rickert - Cristian Rodríguez - Christian von Roques + - Boud Roukema - Torsten Rupp + - Stephen Sachs - Jukka Salmi - Alexandre Sauvé - Benno Schulenberg - Andreas Schwab - Bhargava Shastry - Dan Shechter - Stuart Shelton - Sebastian Andrzej Siewior + - Ville Skyttä - Brad Smith - Bruce Stark - Pippijn van Steenhoven - Jonathan Stott - Dan Stromberg + - Jia Tan - Vincent Torri - Paul Townsend - Mohammed Adnène Trojette - Alexey Tourbin - Loganaden Velvindron - Patrick J. Volkerding - Martin Väth - Adam Walling - Jeffrey Walton - Christian Weisgerber + - Dan Weiss - Bert Wesarg - Fredrik Wikstrom - Jim Wilcoxson - Ralf Wildenhues - Charles Wilson - Lars Wirzenius - Pilorz Wojciech - Ryan Young - Andreas Zieringer Also thanks to all the people who have participated in the Tukaani project. I have probably forgot to add some names to the above list. Sorry about that and thanks for your help. diff --git a/contrib/xz/src/common/mythread.h b/contrib/xz/src/common/mythread.h index be22654240aa..413821836020 100644 --- a/contrib/xz/src/common/mythread.h +++ b/contrib/xz/src/common/mythread.h @@ -1,521 +1,522 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file mythread.h /// \brief Some threading related helper macros and functions // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef MYTHREAD_H #define MYTHREAD_H #include "sysdefs.h" // If any type of threading is enabled, #define MYTHREAD_ENABLED. #if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \ || defined(MYTHREAD_VISTA) # define MYTHREAD_ENABLED 1 #endif #ifdef MYTHREAD_ENABLED //////////////////////////////////////// // Shared between all threading types // //////////////////////////////////////// // Locks a mutex for a duration of a block. // // Perform mythread_mutex_lock(&mutex) in the beginning of a block // and mythread_mutex_unlock(&mutex) at the end of the block. "break" // may be used to unlock the mutex and jump out of the block. // mythread_sync blocks may be nested. // // Example: // // mythread_sync(mutex) { // foo(); // if (some_error) // break; // Skips bar() // bar(); // } // // At least GCC optimizes the loops completely away so it doesn't slow // things down at all compared to plain mythread_mutex_lock(&mutex) // and mythread_mutex_unlock(&mutex) calls. // #define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__) #define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line) #define mythread_sync_helper2(mutex, line) \ for (unsigned int mythread_i_ ## line = 0; \ mythread_i_ ## line \ ? (mythread_mutex_unlock(&(mutex)), 0) \ : (mythread_mutex_lock(&(mutex)), 1); \ mythread_i_ ## line = 1) \ for (unsigned int mythread_j_ ## line = 0; \ !mythread_j_ ## line; \ mythread_j_ ## line = 1) #endif #if !defined(MYTHREAD_ENABLED) ////////////////// // No threading // ////////////////// // Calls the given function once. This isn't thread safe. #define mythread_once(func) \ do { \ static bool once_ = false; \ if (!once_) { \ func(); \ once_ = true; \ } \ } while (0) #if !(defined(_WIN32) && !defined(__CYGWIN__)) // Use sigprocmask() to set the signal mask in single-threaded programs. #include static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { int ret = sigprocmask(how, set, oset); assert(ret == 0); (void)ret; } #endif #elif defined(MYTHREAD_POSIX) //////////////////// // Using pthreads // //////////////////// #include #include #include #include #include #define MYTHREAD_RET_TYPE void * #define MYTHREAD_RET_VALUE NULL typedef pthread_t mythread; typedef pthread_mutex_t mythread_mutex; typedef struct { pthread_cond_t cond; #ifdef HAVE_CLOCK_GETTIME // Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with // the condition variable. clockid_t clk_id; #endif } mythread_cond; typedef struct timespec mythread_condtime; // Calls the given function once in a thread-safe way. #define mythread_once(func) \ do { \ static pthread_once_t once_ = PTHREAD_ONCE_INIT; \ pthread_once(&once_, &func); \ } while (0) // Use pthread_sigmask() to set the signal mask in multi-threaded programs. // Do nothing on OpenVMS since it lacks pthread_sigmask(). static inline void mythread_sigmask(int how, const sigset_t *restrict set, sigset_t *restrict oset) { #ifdef __VMS (void)how; (void)set; (void)oset; #else int ret = pthread_sigmask(how, set, oset); assert(ret == 0); (void)ret; #endif } // Creates a new thread with all signals blocked. Returns zero on success // and non-zero on error. static inline int mythread_create(mythread *thread, void *(*func)(void *arg), void *arg) { sigset_t old; sigset_t all; sigfillset(&all); mythread_sigmask(SIG_SETMASK, &all, &old); const int ret = pthread_create(thread, NULL, func, arg); mythread_sigmask(SIG_SETMASK, &old, NULL); return ret; } // Joins a thread. Returns zero on success and non-zero on error. static inline int mythread_join(mythread thread) { return pthread_join(thread, NULL); } // Initiatlizes a mutex. Returns zero on success and non-zero on error. static inline int mythread_mutex_init(mythread_mutex *mutex) { return pthread_mutex_init(mutex, NULL); } static inline void mythread_mutex_destroy(mythread_mutex *mutex) { int ret = pthread_mutex_destroy(mutex); assert(ret == 0); (void)ret; } static inline void mythread_mutex_lock(mythread_mutex *mutex) { int ret = pthread_mutex_lock(mutex); assert(ret == 0); (void)ret; } static inline void mythread_mutex_unlock(mythread_mutex *mutex) { int ret = pthread_mutex_unlock(mutex); assert(ret == 0); (void)ret; } // Initializes a condition variable. // // Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the // timeout in pthread_cond_timedwait() work correctly also if system time // is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available // everywhere while the default CLOCK_REALTIME is, so the default is // used if CLOCK_MONOTONIC isn't available. // // If clock_gettime() isn't available at all, gettimeofday() will be used. static inline int mythread_cond_init(mythread_cond *mycond) { #ifdef HAVE_CLOCK_GETTIME // NOTE: HAVE_DECL_CLOCK_MONOTONIC is always defined to 0 or 1. # if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && HAVE_DECL_CLOCK_MONOTONIC struct timespec ts; pthread_condattr_t condattr; // POSIX doesn't seem to *require* that pthread_condattr_setclock() // will fail if given an unsupported clock ID. Test that // CLOCK_MONOTONIC really is supported using clock_gettime(). if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0 && pthread_condattr_init(&condattr) == 0) { int ret = pthread_condattr_setclock( &condattr, CLOCK_MONOTONIC); if (ret == 0) ret = pthread_cond_init(&mycond->cond, &condattr); pthread_condattr_destroy(&condattr); if (ret == 0) { mycond->clk_id = CLOCK_MONOTONIC; return 0; } } // If anything above fails, fall back to the default CLOCK_REALTIME. // POSIX requires that all implementations of clock_gettime() must // support at least CLOCK_REALTIME. # endif mycond->clk_id = CLOCK_REALTIME; #endif return pthread_cond_init(&mycond->cond, NULL); } static inline void mythread_cond_destroy(mythread_cond *cond) { int ret = pthread_cond_destroy(&cond->cond); assert(ret == 0); (void)ret; } static inline void mythread_cond_signal(mythread_cond *cond) { int ret = pthread_cond_signal(&cond->cond); assert(ret == 0); (void)ret; } static inline void mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) { int ret = pthread_cond_wait(&cond->cond, mutex); assert(ret == 0); (void)ret; } // Waits on a condition or until a timeout expires. If the timeout expires, // non-zero is returned, otherwise zero is returned. static inline int mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, const mythread_condtime *condtime) { int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime); assert(ret == 0 || ret == ETIMEDOUT); return ret; } // Sets condtime to the absolute time that is timeout_ms milliseconds // in the future. The type of the clock to use is taken from cond. static inline void mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, uint32_t timeout_ms) { condtime->tv_sec = timeout_ms / 1000; condtime->tv_nsec = (timeout_ms % 1000) * 1000000; #ifdef HAVE_CLOCK_GETTIME struct timespec now; int ret = clock_gettime(cond->clk_id, &now); assert(ret == 0); (void)ret; condtime->tv_sec += now.tv_sec; condtime->tv_nsec += now.tv_nsec; #else (void)cond; struct timeval now; gettimeofday(&now, NULL); condtime->tv_sec += now.tv_sec; condtime->tv_nsec += now.tv_usec * 1000L; #endif // tv_nsec must stay in the range [0, 999_999_999]. if (condtime->tv_nsec >= 1000000000L) { condtime->tv_nsec -= 1000000000L; ++condtime->tv_sec; } } #elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA) ///////////////////// // Windows threads // ///////////////////// #define WIN32_LEAN_AND_MEAN #ifdef MYTHREAD_VISTA # undef _WIN32_WINNT # define _WIN32_WINNT 0x0600 #endif #include #include #define MYTHREAD_RET_TYPE unsigned int __stdcall #define MYTHREAD_RET_VALUE 0 typedef HANDLE mythread; typedef CRITICAL_SECTION mythread_mutex; #ifdef MYTHREAD_WIN95 typedef HANDLE mythread_cond; #else typedef CONDITION_VARIABLE mythread_cond; #endif typedef struct { // Tick count (milliseconds) in the beginning of the timeout. // NOTE: This is 32 bits so it wraps around after 49.7 days. // Multi-day timeouts may not work as expected. DWORD start; // Length of the timeout in milliseconds. The timeout expires // when the current tick count minus "start" is equal or greater // than "timeout". DWORD timeout; } mythread_condtime; // mythread_once() is only available with Vista threads. #ifdef MYTHREAD_VISTA #define mythread_once(func) \ do { \ static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \ BOOL pending_; \ if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \ abort(); \ - if (pending_) \ + if (pending_) { \ func(); \ - if (!InitOnceComplete(&once, 0, NULL)) \ - abort(); \ + if (!InitOnceComplete(&once, 0, NULL)) \ + abort(); \ + } \ } while (0) #endif // mythread_sigmask() isn't available on Windows. Even a dummy version would // make no sense because the other POSIX signal functions are missing anyway. static inline int mythread_create(mythread *thread, unsigned int (__stdcall *func)(void *arg), void *arg) { uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL); if (ret == 0) return -1; *thread = (HANDLE)ret; return 0; } static inline int mythread_join(mythread thread) { int ret = 0; if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0) ret = -1; if (!CloseHandle(thread)) ret = -1; return ret; } static inline int mythread_mutex_init(mythread_mutex *mutex) { InitializeCriticalSection(mutex); return 0; } static inline void mythread_mutex_destroy(mythread_mutex *mutex) { DeleteCriticalSection(mutex); } static inline void mythread_mutex_lock(mythread_mutex *mutex) { EnterCriticalSection(mutex); } static inline void mythread_mutex_unlock(mythread_mutex *mutex) { LeaveCriticalSection(mutex); } static inline int mythread_cond_init(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 *cond = CreateEvent(NULL, FALSE, FALSE, NULL); return *cond == NULL ? -1 : 0; #else InitializeConditionVariable(cond); return 0; #endif } static inline void mythread_cond_destroy(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 CloseHandle(*cond); #else (void)cond; #endif } static inline void mythread_cond_signal(mythread_cond *cond) { #ifdef MYTHREAD_WIN95 SetEvent(*cond); #else WakeConditionVariable(cond); #endif } static inline void mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex) { #ifdef MYTHREAD_WIN95 LeaveCriticalSection(mutex); WaitForSingleObject(*cond, INFINITE); EnterCriticalSection(mutex); #else BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE); assert(ret); (void)ret; #endif } static inline int mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex, const mythread_condtime *condtime) { #ifdef MYTHREAD_WIN95 LeaveCriticalSection(mutex); #endif DWORD elapsed = GetTickCount() - condtime->start; DWORD timeout = elapsed >= condtime->timeout ? 0 : condtime->timeout - elapsed; #ifdef MYTHREAD_WIN95 DWORD ret = WaitForSingleObject(*cond, timeout); assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT); EnterCriticalSection(mutex); return ret == WAIT_TIMEOUT; #else BOOL ret = SleepConditionVariableCS(cond, mutex, timeout); assert(ret || GetLastError() == ERROR_TIMEOUT); return !ret; #endif } static inline void mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond, uint32_t timeout) { (void)cond; condtime->start = GetTickCount(); condtime->timeout = timeout; } #endif #endif diff --git a/contrib/xz/src/common/tuklib_cpucores.c b/contrib/xz/src/common/tuklib_cpucores.c index cc968dd25efb..bb3f2f752b1b 100644 --- a/contrib/xz/src/common/tuklib_cpucores.c +++ b/contrib/xz/src/common/tuklib_cpucores.c @@ -1,100 +1,109 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_cpucores.c /// \brief Get the number of CPU cores online // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "tuklib_cpucores.h" #if defined(_WIN32) || defined(__CYGWIN__) # ifndef _WIN32_WINNT # define _WIN32_WINNT 0x0500 # endif # include // glibc >= 2.9 #elif defined(TUKLIB_CPUCORES_SCHED_GETAFFINITY) # include // FreeBSD #elif defined(TUKLIB_CPUCORES_CPUSET) # include # include #elif defined(TUKLIB_CPUCORES_SYSCTL) # ifdef HAVE_SYS_PARAM_H # include # endif # include #elif defined(TUKLIB_CPUCORES_SYSCONF) # include // HP-UX #elif defined(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) # include # include #endif extern uint32_t tuklib_cpucores(void) { uint32_t ret = 0; #if defined(_WIN32) || defined(__CYGWIN__) SYSTEM_INFO sysinfo; GetSystemInfo(&sysinfo); ret = sysinfo.dwNumberOfProcessors; #elif defined(TUKLIB_CPUCORES_SCHED_GETAFFINITY) cpu_set_t cpu_mask; if (sched_getaffinity(0, sizeof(cpu_mask), &cpu_mask) == 0) ret = (uint32_t)CPU_COUNT(&cpu_mask); #elif defined(TUKLIB_CPUCORES_CPUSET) cpuset_t set; if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set), &set) == 0) { # ifdef CPU_COUNT ret = (uint32_t)CPU_COUNT(&set); # else for (unsigned i = 0; i < CPU_SETSIZE; ++i) if (CPU_ISSET(i, &set)) ++ret; # endif } #elif defined(TUKLIB_CPUCORES_SYSCTL) + // On OpenBSD HW_NCPUONLINE tells the number of processor cores that + // are online so it is preferred over HW_NCPU which also counts cores + // that aren't currently available. The number of cores online is + // often less than HW_NCPU because OpenBSD disables simultaneous + // multi-threading (SMT) by default. +# ifdef HW_NCPUONLINE + int name[2] = { CTL_HW, HW_NCPUONLINE }; +# else int name[2] = { CTL_HW, HW_NCPU }; +# endif int cpus; size_t cpus_size = sizeof(cpus); if (sysctl(name, 2, &cpus, &cpus_size, NULL, 0) != -1 && cpus_size == sizeof(cpus) && cpus > 0) ret = (uint32_t)cpus; #elif defined(TUKLIB_CPUCORES_SYSCONF) # ifdef _SC_NPROCESSORS_ONLN // Most systems const long cpus = sysconf(_SC_NPROCESSORS_ONLN); # else // IRIX const long cpus = sysconf(_SC_NPROC_ONLN); # endif if (cpus > 0) ret = (uint32_t)cpus; #elif defined(TUKLIB_CPUCORES_PSTAT_GETDYNAMIC) struct pst_dynamic pst; if (pstat_getdynamic(&pst, sizeof(pst), 1, 0) != -1) ret = (uint32_t)pst.psd_proc_cnt; #endif return ret; } diff --git a/contrib/xz/src/common/tuklib_physmem.c b/contrib/xz/src/common/tuklib_physmem.c index 4053ad006a64..a1bccb2f6753 100644 --- a/contrib/xz/src/common/tuklib_physmem.c +++ b/contrib/xz/src/common/tuklib_physmem.c @@ -1,216 +1,216 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file tuklib_physmem.c /// \brief Get the amount of physical memory // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "tuklib_physmem.h" // We want to use Windows-specific code on Cygwin, which also has memory // information available via sysconf(), but on Cygwin 1.5 and older it // gives wrong results (from our point of view). #if defined(_WIN32) || defined(__CYGWIN__) # ifndef _WIN32_WINNT # define _WIN32_WINNT 0x0500 # endif # include #elif defined(__OS2__) # define INCL_DOSMISC # include #elif defined(__DJGPP__) # include #elif defined(__VMS) # include # include # include #elif defined(AMIGA) || defined(__AROS__) # define __USE_INLINE__ # include #elif defined(__QNX__) # include # include #elif defined(TUKLIB_PHYSMEM_AIX) # include #elif defined(TUKLIB_PHYSMEM_SYSCONF) # include #elif defined(TUKLIB_PHYSMEM_SYSCTL) # ifdef HAVE_SYS_PARAM_H # include # endif # include // Tru64 #elif defined(TUKLIB_PHYSMEM_GETSYSINFO) # include # include // HP-UX #elif defined(TUKLIB_PHYSMEM_PSTAT_GETSTATIC) # include # include // IRIX #elif defined(TUKLIB_PHYSMEM_GETINVENT_R) # include // This sysinfo() is Linux-specific. #elif defined(TUKLIB_PHYSMEM_SYSINFO) # include #endif extern uint64_t tuklib_physmem(void) { uint64_t ret = 0; #if defined(_WIN32) || defined(__CYGWIN__) if ((GetVersion() & 0xFF) >= 5) { // Windows 2000 and later have GlobalMemoryStatusEx() which // supports reporting values greater than 4 GiB. To keep the // code working also on older Windows versions, use // GlobalMemoryStatusEx() conditionally. - HMODULE kernel32 = GetModuleHandle("kernel32.dll"); + HMODULE kernel32 = GetModuleHandle(TEXT("kernel32.dll")); if (kernel32 != NULL) { typedef BOOL (WINAPI *gmse_type)(LPMEMORYSTATUSEX); gmse_type gmse = (gmse_type)GetProcAddress( kernel32, "GlobalMemoryStatusEx"); if (gmse != NULL) { MEMORYSTATUSEX meminfo; meminfo.dwLength = sizeof(meminfo); if (gmse(&meminfo)) ret = meminfo.ullTotalPhys; } } } if (ret == 0) { // GlobalMemoryStatus() is supported by Windows 95 and later, // so it is fine to link against it unconditionally. Note that // GlobalMemoryStatus() has no return value. MEMORYSTATUS meminfo; meminfo.dwLength = sizeof(meminfo); GlobalMemoryStatus(&meminfo); ret = meminfo.dwTotalPhys; } #elif defined(__OS2__) unsigned long mem; if (DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &mem, sizeof(mem)) == 0) ret = mem; #elif defined(__DJGPP__) __dpmi_free_mem_info meminfo; if (__dpmi_get_free_memory_information(&meminfo) == 0 && meminfo.total_number_of_physical_pages != (unsigned long)-1) ret = (uint64_t)meminfo.total_number_of_physical_pages * 4096; #elif defined(__VMS) int vms_mem; int val = SYI$_MEMSIZE; if (LIB$GETSYI(&val, &vms_mem, 0, 0, 0, 0) == SS$_NORMAL) ret = (uint64_t)vms_mem * 8192; #elif defined(AMIGA) || defined(__AROS__) ret = AvailMem(MEMF_TOTAL); #elif defined(__QNX__) const struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); const char *strings = SYSPAGE_ENTRY(strings)->data; for (size_t i = 0; i < count; ++i) if (strcmp(strings + entries[i].name, "ram") == 0) ret += entries[i].end - entries[i].start + 1; #elif defined(TUKLIB_PHYSMEM_AIX) ret = _system_configuration.physmem; #elif defined(TUKLIB_PHYSMEM_SYSCONF) const long pagesize = sysconf(_SC_PAGESIZE); const long pages = sysconf(_SC_PHYS_PAGES); if (pagesize != -1 && pages != -1) // According to docs, pagesize * pages can overflow. // Simple case is 32-bit box with 4 GiB or more RAM, // which may report exactly 4 GiB of RAM, and "long" // being 32-bit will overflow. Casting to uint64_t // hopefully avoids overflows in the near future. ret = (uint64_t)pagesize * (uint64_t)pages; #elif defined(TUKLIB_PHYSMEM_SYSCTL) int name[2] = { CTL_HW, #ifdef HW_PHYSMEM64 HW_PHYSMEM64 #else HW_PHYSMEM #endif }; union { uint32_t u32; uint64_t u64; } mem; size_t mem_ptr_size = sizeof(mem.u64); if (sysctl(name, 2, &mem.u64, &mem_ptr_size, NULL, 0) != -1) { // IIRC, 64-bit "return value" is possible on some 64-bit // BSD systems even with HW_PHYSMEM (instead of HW_PHYSMEM64), // so support both. if (mem_ptr_size == sizeof(mem.u64)) ret = mem.u64; else if (mem_ptr_size == sizeof(mem.u32)) ret = mem.u32; } #elif defined(TUKLIB_PHYSMEM_GETSYSINFO) // Docs are unclear if "start" is needed, but it doesn't hurt // much to have it. int memkb; int start = 0; if (getsysinfo(GSI_PHYSMEM, (caddr_t)&memkb, sizeof(memkb), &start) != -1) ret = (uint64_t)memkb * 1024; #elif defined(TUKLIB_PHYSMEM_PSTAT_GETSTATIC) struct pst_static pst; if (pstat_getstatic(&pst, sizeof(pst), 1, 0) != -1) ret = (uint64_t)pst.physical_memory * (uint64_t)pst.page_size; #elif defined(TUKLIB_PHYSMEM_GETINVENT_R) inv_state_t *st = NULL; if (setinvent_r(&st) != -1) { inventory_t *i; while ((i = getinvent_r(st)) != NULL) { if (i->inv_class == INV_MEMORY && i->inv_type == INV_MAIN_MB) { ret = (uint64_t)i->inv_state << 20; break; } } endinvent_r(st); } #elif defined(TUKLIB_PHYSMEM_SYSINFO) struct sysinfo si; if (sysinfo(&si) == 0) ret = (uint64_t)si.totalram * si.mem_unit; #endif return ret; } diff --git a/contrib/xz/src/liblzma/api/lzma.h b/contrib/xz/src/liblzma/api/lzma.h index 122dab80d357..8fbd9a874b32 100644 --- a/contrib/xz/src/liblzma/api/lzma.h +++ b/contrib/xz/src/liblzma/api/lzma.h @@ -1,326 +1,327 @@ /** * \file api/lzma.h * \brief The public API of liblzma data compression library * * liblzma is a public domain general-purpose data compression library with * a zlib-like API. The native file format is .xz, but also the old .lzma * format and raw (no headers) streams are supported. Multiple compression * algorithms (filters) are supported. Currently LZMA2 is the primary filter. * * liblzma is part of XZ Utils . XZ Utils includes * a gzip-like command line tool named xz and some other tools. XZ Utils * is developed and maintained by Lasse Collin. * * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK * . * * The SHA-256 implementation is based on the public domain code found from * 7-Zip , which has a modified version of the public * domain SHA-256 code found from Crypto++ . * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. */ #ifndef LZMA_H #define LZMA_H /***************************** * Required standard headers * *****************************/ /* * liblzma API headers need some standard types and macros. To allow * including lzma.h without requiring the application to include other * headers first, lzma.h includes the required standard headers unless * they already seem to be included already or if LZMA_MANUAL_HEADERS * has been defined. * * Here's what types and macros are needed and from which headers: * - stddef.h: size_t, NULL * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), * UINT32_MAX, UINT64_MAX * * However, inttypes.h is a little more portable than stdint.h, although * inttypes.h declares some unneeded things compared to plain stdint.h. * * The hacks below aren't perfect, specifically they assume that inttypes.h * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. * If the application already takes care of setting up all the types and * macros properly (for example by using gnulib's stdint.h or inttypes.h), * we try to detect that the macros are already defined and don't include * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to * force this file to never include any system headers. * * Some could argue that liblzma API should provide all the required types, * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was * seen as an unnecessary mess, since most systems already provide all the * necessary types and macros in the standard headers. * * Note that liblzma API still has lzma_bool, because using stdbool.h would * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't * necessarily the same as sizeof(bool) in C++. */ #ifndef LZMA_MANUAL_HEADERS /* * I suppose this works portably also in C++. Note that in C++, * we need to get size_t into the global namespace. */ # include /* * Skip inttypes.h if we already have all the required macros. If we * have the macros, we assume that we have the matching typedefs too. */ # if !defined(UINT32_C) || !defined(UINT64_C) \ || !defined(UINT32_MAX) || !defined(UINT64_MAX) /* * MSVC versions older than 2013 have no C99 support, and * thus they cannot be used to compile liblzma. Using an * existing liblzma.dll with old MSVC can work though(*), * but we need to define the required standard integer * types here in a MSVC-specific way. * * (*) If you do this, the existing liblzma.dll probably uses * a different runtime library than your MSVC-built * application. Mixing runtimes is generally bad, but * in this case it should work as long as you avoid * the few rarely-needed liblzma functions that allocate * memory and expect the caller to free it using free(). */ # if defined(_WIN32) && defined(_MSC_VER) && _MSC_VER < 1800 typedef unsigned __int8 uint8_t; typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; # else /* Use the standard inttypes.h. */ # ifdef __cplusplus /* * C99 sections 7.18.2 and 7.18.4 specify * that C++ implementations define the limit * and constant macros only if specifically * requested. Note that if you want the * format macros (PRIu64 etc.) too, you need * to define __STDC_FORMAT_MACROS before * including lzma.h, since re-including * inttypes.h with __STDC_FORMAT_MACROS * defined doesn't necessarily work. */ # ifndef __STDC_LIMIT_MACROS # define __STDC_LIMIT_MACROS 1 # endif # ifndef __STDC_CONSTANT_MACROS # define __STDC_CONSTANT_MACROS 1 # endif # endif # include # endif /* * Some old systems have only the typedefs in inttypes.h, and * lack all the macros. For those systems, we need a few more * hacks. We assume that unsigned int is 32-bit and unsigned * long is either 32-bit or 64-bit. If these hacks aren't * enough, the application has to setup the types manually * before including lzma.h. */ # ifndef UINT32_C # if defined(_WIN32) && defined(_MSC_VER) # define UINT32_C(n) n ## UI32 # else # define UINT32_C(n) n ## U # endif # endif # ifndef UINT64_C # if defined(_WIN32) && defined(_MSC_VER) # define UINT64_C(n) n ## UI64 # else /* Get ULONG_MAX. */ # include # if ULONG_MAX == 4294967295UL # define UINT64_C(n) n ## ULL # else # define UINT64_C(n) n ## UL # endif # endif # endif # ifndef UINT32_MAX # define UINT32_MAX (UINT32_C(4294967295)) # endif # ifndef UINT64_MAX # define UINT64_MAX (UINT64_C(18446744073709551615)) # endif # endif #endif /* ifdef LZMA_MANUAL_HEADERS */ /****************** * LZMA_API macro * ******************/ /* * Some systems require that the functions and function pointers are * declared specially in the headers. LZMA_API_IMPORT is for importing * symbols and LZMA_API_CALL is to specify the calling convention. * * By default it is assumed that the application will link dynamically * against liblzma. #define LZMA_API_STATIC in your application if you * want to link against static liblzma. If you don't care about portability * to operating systems like Windows, or at least don't care about linking * against static liblzma on them, don't worry about LZMA_API_STATIC. That * is, most developers will never need to use LZMA_API_STATIC. * * The GCC variants are a special case on Windows (Cygwin and MinGW). * We rely on GCC doing the right thing with its auto-import feature, * and thus don't use __declspec(dllimport). This way developers don't * need to worry about LZMA_API_STATIC. Also the calling convention is * omitted on Cygwin but not on MinGW. */ #ifndef LZMA_API_IMPORT # if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) # define LZMA_API_IMPORT __declspec(dllimport) # else # define LZMA_API_IMPORT # endif #endif #ifndef LZMA_API_CALL # if defined(_WIN32) && !defined(__CYGWIN__) # define LZMA_API_CALL __cdecl # else # define LZMA_API_CALL # endif #endif #ifndef LZMA_API # define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL #endif /*********** * nothrow * ***********/ /* * None of the functions in liblzma may throw an exception. Even * the functions that use callback functions won't throw exceptions, * because liblzma would break if a callback function threw an exception. */ #ifndef lzma_nothrow # if defined(__cplusplus) -# if __cplusplus >= 201103L +# if __cplusplus >= 201103L || (defined(_MSVC_LANG) \ + && _MSVC_LANG >= 201103L) # define lzma_nothrow noexcept # else # define lzma_nothrow throw() # endif # elif defined(__GNUC__) && (__GNUC__ > 3 \ || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) # define lzma_nothrow __attribute__((__nothrow__)) # else # define lzma_nothrow # endif #endif /******************** * GNU C extensions * ********************/ /* * GNU C extensions are used conditionally in the public API. It doesn't * break anything if these are sometimes enabled and sometimes not, only * affects warnings and optimizations. */ #if defined(__GNUC__) && __GNUC__ >= 3 # ifndef lzma_attribute # define lzma_attribute(attr) __attribute__(attr) # endif /* warn_unused_result was added in GCC 3.4. */ # ifndef lzma_attr_warn_unused_result # if __GNUC__ == 3 && __GNUC_MINOR__ < 4 # define lzma_attr_warn_unused_result # endif # endif #else # ifndef lzma_attribute # define lzma_attribute(attr) # endif #endif #ifndef lzma_attr_pure # define lzma_attr_pure lzma_attribute((__pure__)) #endif #ifndef lzma_attr_const # define lzma_attr_const lzma_attribute((__const__)) #endif #ifndef lzma_attr_warn_unused_result # define lzma_attr_warn_unused_result \ lzma_attribute((__warn_unused_result__)) #endif /************** * Subheaders * **************/ #ifdef __cplusplus extern "C" { #endif /* * Subheaders check that this is defined. It is to prevent including * them directly from applications. */ #define LZMA_H_INTERNAL 1 /* Basic features */ #include "lzma/version.h" #include "lzma/base.h" #include "lzma/vli.h" #include "lzma/check.h" /* Filters */ #include "lzma/filter.h" #include "lzma/bcj.h" #include "lzma/delta.h" #include "lzma/lzma12.h" /* Container formats */ #include "lzma/container.h" /* Advanced features */ #include "lzma/stream_flags.h" #include "lzma/block.h" #include "lzma/index.h" #include "lzma/index_hash.h" /* Hardware information */ #include "lzma/hardware.h" /* * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications * re-including the subheaders. */ #undef LZMA_H_INTERNAL #ifdef __cplusplus } #endif #endif /* ifndef LZMA_H */ diff --git a/contrib/xz/src/liblzma/api/lzma/base.h b/contrib/xz/src/liblzma/api/lzma/base.h index a6005accc93d..51421353ca1d 100644 --- a/contrib/xz/src/liblzma/api/lzma/base.h +++ b/contrib/xz/src/liblzma/api/lzma/base.h @@ -1,659 +1,670 @@ /** * \file lzma/base.h * \brief Data types and functions used in many places in liblzma API */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** * \brief Boolean * * This is here because C89 doesn't have stdbool.h. To set a value for * variables having type lzma_bool, you can use * - C99's `true' and `false' from stdbool.h; * - C++'s internal `true' and `false'; or * - integers one (true) and zero (false). */ typedef unsigned char lzma_bool; /** * \brief Type of reserved enumeration variable in structures * * To avoid breaking library ABI when new features are added, several * structures contain extra variables that may be used in future. Since * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may * even vary depending on the range of enumeration constants, we specify * a separate type to be used for reserved enumeration variables. All * enumeration constants in liblzma API will be non-negative and less * than 128, which should guarantee that the ABI won't break even when * new constants are added to existing enumerations. */ typedef enum { LZMA_RESERVED_ENUM = 0 } lzma_reserved_enum; /** * \brief Return values used by several functions in liblzma * * Check the descriptions of specific functions to find out which return * values they can return. With some functions the return values may have * more specific meanings than described here; those differences are * described per-function basis. */ typedef enum { LZMA_OK = 0, /**< * \brief Operation completed successfully */ LZMA_STREAM_END = 1, /**< * \brief End of stream was reached * * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or * LZMA_FINISH was finished. In decoder, this indicates * that all the data was successfully decoded. * * In all cases, when LZMA_STREAM_END is returned, the last * output bytes should be picked from strm->next_out. */ LZMA_NO_CHECK = 2, /**< * \brief Input stream has no integrity check * * This return value can be returned only if the * LZMA_TELL_NO_CHECK flag was used when initializing * the decoder. LZMA_NO_CHECK is just a warning, and * the decoding can be continued normally. * * It is possible to call lzma_get_check() immediately after * lzma_code has returned LZMA_NO_CHECK. The result will * naturally be LZMA_CHECK_NONE, but the possibility to call * lzma_get_check() may be convenient in some applications. */ LZMA_UNSUPPORTED_CHECK = 3, /**< * \brief Cannot calculate the integrity check * * The usage of this return value is different in encoders * and decoders. * * Encoders can return this value only from the initialization * function. If initialization fails with this value, the * encoding cannot be done, because there's no way to produce * output with the correct integrity check. * * Decoders can return this value only from lzma_code() and * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when * initializing the decoder. The decoding can still be * continued normally even if the check type is unsupported, * but naturally the check will not be validated, and possible * errors may go undetected. * * With decoder, it is possible to call lzma_get_check() * immediately after lzma_code() has returned * LZMA_UNSUPPORTED_CHECK. This way it is possible to find * out what the unsupported Check ID was. */ LZMA_GET_CHECK = 4, /**< * \brief Integrity check type is now available * * This value can be returned only by the lzma_code() function * and only if the decoder was initialized with the * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the * application that it may now call lzma_get_check() to find * out the Check ID. This can be used, for example, to * implement a decoder that accepts only files that have * strong enough integrity check. */ LZMA_MEM_ERROR = 5, /**< * \brief Cannot allocate memory * * Memory allocation failed, or the size of the allocation * would be greater than SIZE_MAX. * * Due to internal implementation reasons, the coding cannot * be continued even if more memory were made available after * LZMA_MEM_ERROR. */ LZMA_MEMLIMIT_ERROR = 6, /** * \brief Memory usage limit was reached * * Decoder would need more memory than allowed by the * specified memory usage limit. To continue decoding, * the memory usage limit has to be increased with * lzma_memlimit_set(). + * + * liblzma 5.2.6 and earlier had a bug in single-threaded .xz + * decoder (lzma_stream_decoder()) which made it impossible + * to continue decoding after LZMA_MEMLIMIT_ERROR even if + * the limit was increased using lzma_memlimit_set(). + * Other decoders worked correctly. */ LZMA_FORMAT_ERROR = 7, /**< * \brief File format not recognized * * The decoder did not recognize the input as supported file * format. This error can occur, for example, when trying to * decode .lzma format file with lzma_stream_decoder, * because lzma_stream_decoder accepts only the .xz format. */ LZMA_OPTIONS_ERROR = 8, /**< * \brief Invalid or unsupported options * * Invalid or unsupported options, for example * - unsupported filter(s) or filter options; or * - reserved bits set in headers (decoder only). * * Rebuilding liblzma with more features enabled, or * upgrading to a newer version of liblzma may help. */ LZMA_DATA_ERROR = 9, /**< * \brief Data is corrupt * * The usage of this return value is different in encoders * and decoders. In both encoder and decoder, the coding * cannot continue after this error. * * Encoders return this if size limits of the target file * format would be exceeded. These limits are huge, thus * getting this error from an encoder is mostly theoretical. * For example, the maximum compressed and uncompressed * size of a .xz Stream is roughly 8 EiB (2^63 bytes). * * Decoders return this error if the input data is corrupt. * This can mean, for example, invalid CRC32 in headers * or invalid check of uncompressed data. */ LZMA_BUF_ERROR = 10, /**< * \brief No progress is possible * * This error code is returned when the coder cannot consume * any new input and produce any new output. The most common * reason for this error is that the input stream being * decoded is truncated or corrupt. * * This error is not fatal. Coding can be continued normally * by providing more input and/or more output space, if * possible. * * Typically the first call to lzma_code() that can do no * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only * the second consecutive call doing no progress will return * LZMA_BUF_ERROR. This is intentional. * * With zlib, Z_BUF_ERROR may be returned even if the * application is doing nothing wrong, so apps will need * to handle Z_BUF_ERROR specially. The above hack * guarantees that liblzma never returns LZMA_BUF_ERROR * to properly written applications unless the input file * is truncated or corrupt. This should simplify the * applications a little. */ LZMA_PROG_ERROR = 11, /**< * \brief Programming error * * This indicates that the arguments given to the function are * invalid or the internal state of the decoder is corrupt. * - Function arguments are invalid or the structures * pointed by the argument pointers are invalid * e.g. if strm->next_out has been set to NULL and * strm->avail_out > 0 when calling lzma_code(). * - lzma_* functions have been called in wrong order * e.g. lzma_code() was called right after lzma_end(). * - If errors occur randomly, the reason might be flaky * hardware. * * If you think that your code is correct, this error code * can be a sign of a bug in liblzma. See the documentation * how to report bugs. */ } lzma_ret; /** * \brief The `action' argument for lzma_code() * * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER, * or LZMA_FINISH, the same `action' must is used until lzma_code() returns * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must * not be modified by the application until lzma_code() returns * LZMA_STREAM_END. Changing the `action' or modifying the amount of input * will make lzma_code() return LZMA_PROG_ERROR. */ typedef enum { LZMA_RUN = 0, /**< * \brief Continue coding * * Encoder: Encode as much input as possible. Some internal * buffering will probably be done (depends on the filter * chain in use), which causes latency: the input used won't * usually be decodeable from the output of the same * lzma_code() call. * * Decoder: Decode as much input as possible and produce as * much output as possible. */ LZMA_SYNC_FLUSH = 1, /**< * \brief Make all the input available at output * * Normally the encoder introduces some latency. * LZMA_SYNC_FLUSH forces all the buffered data to be * available at output without resetting the internal * state of the encoder. This way it is possible to use * compressed stream for example for communication over * network. * * Only some filters support LZMA_SYNC_FLUSH. Trying to use * LZMA_SYNC_FLUSH with filters that don't support it will * make lzma_code() return LZMA_OPTIONS_ERROR. For example, * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. * * Using LZMA_SYNC_FLUSH very often can dramatically reduce * the compression ratio. With some filters (for example, * LZMA2), fine-tuning the compression options may help * mitigate this problem significantly (for example, * match finder with LZMA2). * * Decoders don't support LZMA_SYNC_FLUSH. */ LZMA_FULL_FLUSH = 2, /**< * \brief Finish encoding of the current Block * * All the input data going to the current Block must have * been given to the encoder (the last bytes can still be * pending in *next_in). Call lzma_code() with LZMA_FULL_FLUSH * until it returns LZMA_STREAM_END. Then continue normally * with LZMA_RUN or finish the Stream with LZMA_FINISH. * * This action is currently supported only by Stream encoder * and easy encoder (which uses Stream encoder). If there is * no unfinished Block, no empty Block is created. */ LZMA_FULL_BARRIER = 4, /**< * \brief Finish encoding of the current Block * * This is like LZMA_FULL_FLUSH except that this doesn't * necessarily wait until all the input has been made * available via the output buffer. That is, lzma_code() * might return LZMA_STREAM_END as soon as all the input * has been consumed (avail_in == 0). * * LZMA_FULL_BARRIER is useful with a threaded encoder if * one wants to split the .xz Stream into Blocks at specific * offsets but doesn't care if the output isn't flushed * immediately. Using LZMA_FULL_BARRIER allows keeping * the threads busy while LZMA_FULL_FLUSH would make * lzma_code() wait until all the threads have finished * until more data could be passed to the encoder. * * With a lzma_stream initialized with the single-threaded * lzma_stream_encoder() or lzma_easy_encoder(), * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH. */ LZMA_FINISH = 3 /**< * \brief Finish the coding operation * * All the input data must have been given to the encoder * (the last bytes can still be pending in next_in). * Call lzma_code() with LZMA_FINISH until it returns * LZMA_STREAM_END. Once LZMA_FINISH has been used, * the amount of input must no longer be changed by * the application. * * When decoding, using LZMA_FINISH is optional unless the * LZMA_CONCATENATED flag was used when the decoder was * initialized. When LZMA_CONCATENATED was not used, the only * effect of LZMA_FINISH is that the amount of input must not * be changed just like in the encoder. */ } lzma_action; /** * \brief Custom functions for memory handling * * A pointer to lzma_allocator may be passed via lzma_stream structure * to liblzma, and some advanced functions take a pointer to lzma_allocator * as a separate function argument. The library will use the functions * specified in lzma_allocator for memory handling instead of the default * malloc() and free(). C++ users should note that the custom memory * handling functions must not throw exceptions. * * Single-threaded mode only: liblzma doesn't make an internal copy of * lzma_allocator. Thus, it is OK to change these function pointers in * the middle of the coding process, but obviously it must be done * carefully to make sure that the replacement `free' can deallocate * memory allocated by the earlier `alloc' function(s). * * Multithreaded mode: liblzma might internally store pointers to the * lzma_allocator given via the lzma_stream structure. The application * must not change the allocator pointer in lzma_stream or the contents * of the pointed lzma_allocator structure until lzma_end() has been used * to free the memory associated with that lzma_stream. The allocation * functions might be called simultaneously from multiple threads, and * thus they must be thread safe. */ typedef struct { /** * \brief Pointer to a custom memory allocation function * * If you don't want a custom allocator, but still want * custom free(), set this to NULL and liblzma will use * the standard malloc(). * * \param opaque lzma_allocator.opaque (see below) * \param nmemb Number of elements like in calloc(). liblzma * will always set nmemb to 1, so it is safe to * ignore nmemb in a custom allocator if you like. * The nmemb argument exists only for * compatibility with zlib and libbzip2. * \param size Size of an element in bytes. * liblzma never sets this to zero. * * \return Pointer to the beginning of a memory block of * `size' bytes, or NULL if allocation fails * for some reason. When allocation fails, functions * of liblzma return LZMA_MEM_ERROR. * * The allocator should not waste time zeroing the allocated buffers. * This is not only about speed, but also memory usage, since the * operating system kernel doesn't necessarily allocate the requested * memory in physical memory until it is actually used. With small * input files, liblzma may actually need only a fraction of the * memory that it requested for allocation. * * \note LZMA_MEM_ERROR is also used when the size of the * allocation would be greater than SIZE_MAX. Thus, * don't assume that the custom allocator must have * returned NULL if some function from liblzma * returns LZMA_MEM_ERROR. */ void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); /** * \brief Pointer to a custom memory freeing function * * If you don't want a custom freeing function, but still * want a custom allocator, set this to NULL and liblzma * will use the standard free(). * * \param opaque lzma_allocator.opaque (see below) * \param ptr Pointer returned by lzma_allocator.alloc(), * or when it is set to NULL, a pointer returned * by the standard malloc(). */ void (LZMA_API_CALL *free)(void *opaque, void *ptr); /** * \brief Pointer passed to .alloc() and .free() * * opaque is passed as the first argument to lzma_allocator.alloc() * and lzma_allocator.free(). This intended to ease implementing * custom memory allocation functions for use with liblzma. * * If you don't need this, you should set this to NULL. */ void *opaque; } lzma_allocator; /** * \brief Internal data structure * * The contents of this structure is not visible outside the library. */ typedef struct lzma_internal_s lzma_internal; /** * \brief Passing data to and from liblzma * * The lzma_stream structure is used for * - passing pointers to input and output buffers to liblzma; - * - defining custom memory hander functions; and + * - defining custom memory handler functions; and * - holding a pointer to coder-specific internal data structures. * * Typical usage: * * - After allocating lzma_stream (on stack or with malloc()), it must be * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). * * - Initialize a coder to the lzma_stream, for example by using * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: * - In contrast to zlib, strm->next_in and strm->next_out are * ignored by all initialization functions, thus it is safe * to not initialize them yet. * - The initialization functions always set strm->total_in and * strm->total_out to zero. * - If the initialization function fails, no memory is left allocated * that would require freeing with lzma_end() even if some memory was * associated with the lzma_stream structure when the initialization * function was called. * * - Use lzma_code() to do the actual work. * * - Once the coding has been finished, the existing lzma_stream can be * reused. It is OK to reuse lzma_stream with different initialization * function without calling lzma_end() first. Old allocations are * automatically freed. * * - Finally, use lzma_end() to free the allocated memory. lzma_end() never * frees the lzma_stream structure itself. * * Application may modify the values of total_in and total_out as it wants. * They are updated by liblzma to match the amount of data read and * written but aren't used for anything else except as a possible return * values from lzma_get_progress(). */ typedef struct { const uint8_t *next_in; /**< Pointer to the next input byte. */ size_t avail_in; /**< Number of available input bytes in next_in. */ uint64_t total_in; /**< Total number of bytes read by liblzma. */ uint8_t *next_out; /**< Pointer to the next output position. */ size_t avail_out; /**< Amount of free space in next_out. */ uint64_t total_out; /**< Total number of bytes written by liblzma. */ /** * \brief Custom memory allocation functions * * In most cases this is NULL which makes liblzma use * the standard malloc() and free(). * * \note In 5.0.x this is not a const pointer. */ const lzma_allocator *allocator; /** Internal state is not visible to applications. */ lzma_internal *internal; /* * Reserved space to allow possible future extensions without * breaking the ABI. Excluding the initialization of this structure, * you should not touch these, because the names of these variables * may change. */ void *reserved_ptr1; void *reserved_ptr2; void *reserved_ptr3; void *reserved_ptr4; uint64_t reserved_int1; uint64_t reserved_int2; size_t reserved_int3; size_t reserved_int4; lzma_reserved_enum reserved_enum1; lzma_reserved_enum reserved_enum2; } lzma_stream; /** * \brief Initialization for lzma_stream * * When you declare an instance of lzma_stream, you can immediately * initialize it so that initialization functions know that no memory * has been allocated yet: * * lzma_stream strm = LZMA_STREAM_INIT; * * If you need to initialize a dynamically allocated lzma_stream, you can use * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this * violates the C standard since NULL may have different internal * representation than zero, but it should be portable enough in practice. * Anyway, for maximum portability, you can use something like this: * * lzma_stream tmp = LZMA_STREAM_INIT; * *strm = tmp; */ #define LZMA_STREAM_INIT \ { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } /** * \brief Encode or decode data * * Once the lzma_stream has been successfully initialized (e.g. with * lzma_stream_encoder()), the actual encoding or decoding is done * using this function. The application has to update strm->next_in, * strm->avail_in, strm->next_out, and strm->avail_out to pass input * to and get output from liblzma. * * See the description of the coder-specific initialization function to find * out what `action' values are supported by the coder. */ extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Free memory allocated for the coder data structures * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other * members of the lzma_stream structure are touched. * * \note zlib indicates an error if application end()s unfinished * stream structure. liblzma doesn't do this, and assumes that * application knows what it is doing. */ extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; /** * \brief Get progress information * * In single-threaded mode, applications can get progress information from * strm->total_in and strm->total_out. In multi-threaded mode this is less * useful because a significant amount of both input and output data gets * buffered internally by liblzma. This makes total_in and total_out give * misleading information and also makes the progress indicator updates * non-smooth. * * This function gives realistic progress information also in multi-threaded * mode by taking into account the progress made by each thread. In * single-threaded mode *progress_in and *progress_out are set to * strm->total_in and strm->total_out, respectively. */ extern LZMA_API(void) lzma_get_progress(lzma_stream *strm, uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow; /** * \brief Get the memory usage of decoder filter chain * * This function is currently supported only when *strm has been initialized * with a function that takes a memlimit argument. With other functions, you * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() * to estimate the memory requirements. * * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big * the memory usage limit should have been to decode the input. Note that * this may give misleading information if decoding .xz Streams that have * multiple Blocks, because each Block can have different memory requirements. * * \return How much memory is currently allocated for the filter * decoders. If no filter chain is currently allocated, * some non-zero value is still returned, which is less than * or equal to what any filter chain would indicate as its * memory requirement. * * If this function isn't supported by *strm or some other error * occurs, zero is returned. */ extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) lzma_nothrow lzma_attr_pure; /** * \brief Get the current memory usage limit * * This function is supported only when *strm has been initialized with * a function that takes a memlimit argument. * * \return On success, the current memory usage limit is returned * (always non-zero). On error, zero is returned. */ extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) lzma_nothrow lzma_attr_pure; /** * \brief Set the memory usage limit * * This function is supported only when *strm has been initialized with * a function that takes a memlimit argument. * * liblzma 5.2.3 and earlier has a bug where memlimit value of 0 causes * this function to do nothing (leaving the limit unchanged) and still * return LZMA_OK. Later versions treat 0 as if 1 had been specified (so * lzma_memlimit_get() will return 1 even if you specify 0 here). * + * liblzma 5.2.6 and earlier had a bug in single-threaded .xz decoder + * (lzma_stream_decoder()) which made it impossible to continue decoding + * after LZMA_MEMLIMIT_ERROR even if the limit was increased using + * lzma_memlimit_set(). Other decoders worked correctly. + * * \return - LZMA_OK: New memory usage limit successfully set. * - LZMA_MEMLIMIT_ERROR: The new limit is too small. * The limit was not changed. * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't * support memory usage limit. */ extern LZMA_API(lzma_ret) lzma_memlimit_set( lzma_stream *strm, uint64_t memlimit) lzma_nothrow; diff --git a/contrib/xz/src/liblzma/api/lzma/block.h b/contrib/xz/src/liblzma/api/lzma/block.h index 962f38779cc7..082e55833e70 100644 --- a/contrib/xz/src/liblzma/api/lzma/block.h +++ b/contrib/xz/src/liblzma/api/lzma/block.h @@ -1,581 +1,578 @@ /** * \file lzma/block.h * \brief .xz Block handling */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** * \brief Options for the Block and Block Header encoders and decoders * * Different Block handling functions use different parts of this structure. * Some read some members, other functions write, and some do both. Only the * members listed for reading need to be initialized when the specified * functions are called. The members marked for writing will be assigned * new values at some point either by calling the given function or by * later calls to lzma_code(). */ typedef struct { /** * \brief Block format version * * To prevent API and ABI breakages when new features are needed, * a version number is used to indicate which fields in this * structure are in use: * - liblzma >= 5.0.0: version = 0 is supported. * - liblzma >= 5.1.4beta: Support for version = 1 was added, * which adds the ignore_check field. * * If version is greater than one, most Block related functions * will return LZMA_OPTIONS_ERROR (lzma_block_header_decode() works * with any version value). * * Read by: * - All functions that take pointer to lzma_block as argument, * including lzma_block_header_decode(). * * Written by: * - lzma_block_header_decode() */ uint32_t version; /** * \brief Size of the Block Header field * * This is always a multiple of four. * * Read by: * - lzma_block_header_encode() * - lzma_block_header_decode() * - lzma_block_compressed_size() * - lzma_block_unpadded_size() * - lzma_block_total_size() * - lzma_block_decoder() * - lzma_block_buffer_decode() * * Written by: * - lzma_block_header_size() * - lzma_block_buffer_encode() */ uint32_t header_size; # define LZMA_BLOCK_HEADER_SIZE_MIN 8 # define LZMA_BLOCK_HEADER_SIZE_MAX 1024 /** * \brief Type of integrity Check * * The Check ID is not stored into the Block Header, thus its value * must be provided also when decoding. * * Read by: * - lzma_block_header_encode() * - lzma_block_header_decode() * - lzma_block_compressed_size() * - lzma_block_unpadded_size() * - lzma_block_total_size() * - lzma_block_encoder() * - lzma_block_decoder() * - lzma_block_buffer_encode() * - lzma_block_buffer_decode() */ lzma_check check; /** * \brief Size of the Compressed Data in bytes * * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder * will store this value to the Block Header. Block encoder doesn't * care about this value, but will set it once the encoding has been * finished. * * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will * verify that the size of the Compressed Data field matches * compressed_size. * * Usually you don't know this value when encoding in streamed mode, * and thus cannot write this field into the Block Header. * * In non-streamed mode you can reserve space for this field before * encoding the actual Block. After encoding the data, finish the * Block by encoding the Block Header. Steps in detail: * * - Set compressed_size to some big enough value. If you don't know * better, use LZMA_VLI_MAX, but remember that bigger values take * more space in Block Header. * * - Call lzma_block_header_size() to see how much space you need to * reserve for the Block Header. * * - Encode the Block using lzma_block_encoder() and lzma_code(). * It sets compressed_size to the correct value. * * - Use lzma_block_header_encode() to encode the Block Header. * Because space was reserved in the first step, you don't need * to call lzma_block_header_size() anymore, because due to * reserving, header_size has to be big enough. If it is "too big", * lzma_block_header_encode() will add enough Header Padding to * make Block Header to match the size specified by header_size. * * Read by: * - lzma_block_header_size() * - lzma_block_header_encode() * - lzma_block_compressed_size() * - lzma_block_unpadded_size() * - lzma_block_total_size() * - lzma_block_decoder() * - lzma_block_buffer_decode() * * Written by: * - lzma_block_header_decode() * - lzma_block_compressed_size() * - lzma_block_encoder() * - lzma_block_decoder() * - lzma_block_buffer_encode() * - lzma_block_buffer_decode() */ lzma_vli compressed_size; /** * \brief Uncompressed Size in bytes * * This is handled very similarly to compressed_size above. * * uncompressed_size is needed by fewer functions than * compressed_size. This is because uncompressed_size isn't * needed to validate that Block stays within proper limits. * * Read by: * - lzma_block_header_size() * - lzma_block_header_encode() * - lzma_block_decoder() * - lzma_block_buffer_decode() * * Written by: * - lzma_block_header_decode() * - lzma_block_encoder() * - lzma_block_decoder() * - lzma_block_buffer_encode() * - lzma_block_buffer_decode() */ lzma_vli uncompressed_size; /** * \brief Array of filters * * There can be 1-4 filters. The end of the array is marked with * .id = LZMA_VLI_UNKNOWN. * * Read by: * - lzma_block_header_size() * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * - lzma_block_buffer_encode() * - lzma_block_buffer_decode() * * Written by: * - lzma_block_header_decode(): Note that this does NOT free() * the old filter options structures. All unused filters[] will * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If * decoding fails, all filters[] are guaranteed to be * LZMA_VLI_UNKNOWN and NULL. * * \note Because of the array is terminated with * .id = LZMA_VLI_UNKNOWN, the actual array must * have LZMA_FILTERS_MAX + 1 members or the Block * Header decoder will overflow the buffer. */ lzma_filter *filters; /** * \brief Raw value stored in the Check field * * After successful coding, the first lzma_check_size(check) bytes * of this array contain the raw value stored in the Check field. * * Note that CRC32 and CRC64 are stored in little endian byte order. * Take it into account if you display the Check values to the user. * * Written by: * - lzma_block_encoder() * - lzma_block_decoder() * - lzma_block_buffer_encode() * - lzma_block_buffer_decode() */ uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; /* * Reserved space to allow possible future extensions without * breaking the ABI. You should not touch these, because the names * of these variables may change. These are and will never be used * with the currently supported options, so it is safe to leave these * uninitialized. */ void *reserved_ptr1; void *reserved_ptr2; void *reserved_ptr3; uint32_t reserved_int1; uint32_t reserved_int2; lzma_vli reserved_int3; lzma_vli reserved_int4; lzma_vli reserved_int5; lzma_vli reserved_int6; lzma_vli reserved_int7; lzma_vli reserved_int8; lzma_reserved_enum reserved_enum1; lzma_reserved_enum reserved_enum2; lzma_reserved_enum reserved_enum3; lzma_reserved_enum reserved_enum4; /** * \brief A flag to Block decoder to not verify the Check field * * This field is supported by liblzma >= 5.1.4beta if .version >= 1. * * If this is set to true, the integrity check won't be calculated * and verified. Unless you know what you are doing, you should * leave this to false. (A reason to set this to true is when the * file integrity is verified externally anyway and you want to * speed up the decompression, which matters mostly when using * SHA-256 as the integrity check.) * * If .version >= 1, read by: * - lzma_block_decoder() * - lzma_block_buffer_decode() * * Written by (.version is ignored): * - lzma_block_header_decode() always sets this to false */ lzma_bool ignore_check; lzma_bool reserved_bool2; lzma_bool reserved_bool3; lzma_bool reserved_bool4; lzma_bool reserved_bool5; lzma_bool reserved_bool6; lzma_bool reserved_bool7; lzma_bool reserved_bool8; } lzma_block; /** * \brief Decode the Block Header Size field * * To decode Block Header using lzma_block_header_decode(), the size of the * Block Header has to be known and stored into lzma_block.header_size. * The size can be calculated from the first byte of a Block using this macro. * Note that if the first byte is 0x00, it indicates beginning of Index; use * this macro only when the byte is not 0x00. * * There is no encoding macro, because Block Header encoder is enough for that. */ #define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) /** * \brief Calculate Block Header Size * * Calculate the minimum size needed for the Block Header field using the * settings specified in the lzma_block structure. Note that it is OK to * increase the calculated header_size value as long as it is a multiple of * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size * just means that lzma_block_header_encode() will add Header Padding. * * \return - LZMA_OK: Size calculated successfully and stored to * block->header_size. * - LZMA_OPTIONS_ERROR: Unsupported version, filters or * filter options. * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. * * \note This doesn't check that all the options are valid i.e. this * may return LZMA_OK even if lzma_block_header_encode() or * lzma_block_encoder() would fail. If you want to validate the * filter chain, consider using lzma_memlimit_encoder() which as * a side-effect validates the filter chain. */ extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Encode Block Header * * The caller must have calculated the size of the Block Header already with * lzma_block_header_size(). If a value larger than the one calculated by * lzma_block_header_size() is used, the Block Header will be padded to the * specified size. * * \param out Beginning of the output buffer. This must be * at least block->header_size bytes. * \param block Block options to be encoded. * * \return - LZMA_OK: Encoding was successful. block->header_size * bytes were written to output buffer. * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. * - LZMA_PROG_ERROR: Invalid arguments, for example * block->header_size is invalid or block->filters is NULL. */ extern LZMA_API(lzma_ret) lzma_block_header_encode( const lzma_block *block, uint8_t *out) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Decode Block Header * * block->version should (usually) be set to the highest value supported * by the application. If the application sets block->version to a value * higher than supported by the current liblzma version, this function will * downgrade block->version to the highest value supported by it. Thus one * should check the value of block->version after calling this function if * block->version was set to a non-zero value and the application doesn't * otherwise know that the liblzma version being used is new enough to * support the specified block->version. * * The size of the Block Header must have already been decoded with * lzma_block_header_size_decode() macro and stored to block->header_size. * * The integrity check type from Stream Header must have been stored * to block->check. * * block->filters must have been allocated, but they don't need to be * initialized (possible existing filter options are not freed). * * \param block Destination for Block options. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() (and also free() * if an error occurs). * \param in Beginning of the input buffer. This must be * at least block->header_size bytes. * * \return - LZMA_OK: Decoding was successful. block->header_size * bytes were read from the input buffer. * - LZMA_OPTIONS_ERROR: The Block Header specifies some * unsupported options such as unsupported filters. This can * happen also if block->version was set to a too low value * compared to what would be required to properly represent * the information stored in the Block Header. * - LZMA_DATA_ERROR: Block Header is corrupt, for example, * the CRC32 doesn't match. * - LZMA_PROG_ERROR: Invalid arguments, for example * block->header_size is invalid or block->filters is NULL. */ extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Validate and set Compressed Size according to Unpadded Size * * Block Header stores Compressed Size, but Index has Unpadded Size. If the * application has already parsed the Index and is now decoding Blocks, * it can calculate Compressed Size from Unpadded Size. This function does * exactly that with error checking: * * - Compressed Size calculated from Unpadded Size must be positive integer, * that is, Unpadded Size must be big enough that after Block Header and * Check fields there's still at least one byte for Compressed Size. * * - If Compressed Size was present in Block Header, the new value * calculated from Unpadded Size is compared against the value * from Block Header. * * \note This function must be called _after_ decoding the Block Header * field so that it can properly validate Compressed Size if it * was present in Block Header. * * \return - LZMA_OK: block->compressed_size was set successfully. * - LZMA_DATA_ERROR: unpadded_size is too small compared to * block->header_size and lzma_check_size(block->check). * - LZMA_PROG_ERROR: Some values are invalid. For example, * block->header_size must be a multiple of four and * between 8 and 1024 inclusive. */ extern LZMA_API(lzma_ret) lzma_block_compressed_size( lzma_block *block, lzma_vli unpadded_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Calculate Unpadded Size * * The Index field stores Unpadded Size and Uncompressed Size. The latter * can be taken directly from the lzma_block structure after coding a Block, * but Unpadded Size needs to be calculated from Block Header Size, * Compressed Size, and size of the Check field. This is where this function * is needed. * * \return Unpadded Size on success, or zero on error. */ extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) lzma_nothrow lzma_attr_pure; /** * \brief Calculate the total encoded size of a Block * * This is equivalent to lzma_block_unpadded_size() except that the returned * value includes the size of the Block Padding field. * * \return On success, total encoded size of the Block. On error, * zero is returned. */ extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) lzma_nothrow lzma_attr_pure; /** * \brief Initialize .xz Block encoder * * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the * filter chain supports it), and LZMA_FINISH. * * \return - LZMA_OK: All good, continue with lzma_code(). * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID * that is not supported by this build of liblzma. Initializing * the encoder failed. * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_block_encoder( lzma_stream *strm, lzma_block *block) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize .xz Block decoder * * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using * LZMA_FINISH is not required. It is supported only for convenience. * * \return - LZMA_OK: All good, continue with lzma_code(). - * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but - * the given Check ID is not supported, thus Check will be - * ignored. * - LZMA_PROG_ERROR * - LZMA_MEM_ERROR */ extern LZMA_API(lzma_ret) lzma_block_decoder( lzma_stream *strm, lzma_block *block) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Calculate maximum output size for single-call Block encoding * * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. * See the documentation of lzma_stream_buffer_bound(). */ extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) lzma_nothrow; /** * \brief Single-call .xz Block encoder * * In contrast to the multi-call encoder initialized with * lzma_block_encoder(), this function encodes also the Block Header. This * is required to make it possible to write appropriate Block Header also * in case the data isn't compressible, and different filter chain has to be * used to encode the data in uncompressed form using uncompressed chunks * of the LZMA2 filter. * * When the data isn't compressible, header_size, compressed_size, and * uncompressed_size are set just like when the data was compressible, but * it is possible that header_size is too small to hold the filter chain * specified in block->filters, because that isn't necessarily the filter * chain that was actually used to encode the data. lzma_block_unpadded_size() * still works normally, because it doesn't read the filters array. * * \param block Block options: block->version, block->check, * and block->filters must have been initialized. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_block_buffer_encode( lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Single-call uncompressed .xz Block encoder * * This is like lzma_block_buffer_encode() except this doesn't try to * compress the data and instead encodes the data using LZMA2 uncompressed * chunks. The required output buffer size can be determined with * lzma_block_buffer_bound(). * * Since the data won't be compressed, this function ignores block->filters. * This function doesn't take lzma_allocator because this function doesn't * allocate any memory from the heap. */ extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Single-call .xz Block decoder * * This is single-call equivalent of lzma_block_decoder(), and requires that * the caller has already decoded Block Header and checked its memory usage. * * \param block Block options just like with lzma_block_decoder(). * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * *in_pos is updated only if decoding succeeds. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return - LZMA_OK: Decoding was successful. * - LZMA_OPTIONS_ERROR * - LZMA_DATA_ERROR * - LZMA_MEM_ERROR * - LZMA_BUF_ERROR: Output buffer was too small. * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_block_buffer_decode( lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; diff --git a/contrib/xz/src/liblzma/api/lzma/container.h b/contrib/xz/src/liblzma/api/lzma/container.h index 9fbf4df06178..9a9ff1c9a486 100644 --- a/contrib/xz/src/liblzma/api/lzma/container.h +++ b/contrib/xz/src/liblzma/api/lzma/container.h @@ -1,632 +1,644 @@ /** * \file lzma/container.h * \brief File formats */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /************ * Encoding * ************/ /** * \brief Default compression preset * * It's not straightforward to recommend a default preset, because in some * cases keeping the resource usage relatively low is more important that * getting the maximum compression ratio. */ #define LZMA_PRESET_DEFAULT UINT32_C(6) /** * \brief Mask for preset level * * This is useful only if you need to extract the level from the preset * variable. That should be rare. */ #define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) /* * Preset flags * * Currently only one flag is defined. */ /** * \brief Extreme compression preset * * This flag modifies the preset to make the encoding significantly slower * while improving the compression ratio only marginally. This is useful * when you don't mind wasting time to get as small result as possible. * * This flag doesn't affect the memory usage requirements of the decoder (at * least not significantly). The memory usage of the encoder may be increased * a little but only at the lowest preset levels (0-3). */ #define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) /** * \brief Multithreading options */ typedef struct { /** * \brief Flags * * Set this to zero if no flags are wanted. * * No flags are currently supported. */ uint32_t flags; /** * \brief Number of worker threads to use */ uint32_t threads; /** * \brief Maximum uncompressed size of a Block * * The encoder will start a new .xz Block every block_size bytes. * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code() * the caller may tell liblzma to start a new Block earlier. * * With LZMA2, a recommended block size is 2-4 times the LZMA2 * dictionary size. With very small dictionaries, it is recommended * to use at least 1 MiB block size for good compression ratio, even * if this is more than four times the dictionary size. Note that * these are only recommendations for typical use cases; feel free * to use other values. Just keep in mind that using a block size * less than the LZMA2 dictionary size is waste of RAM. * * Set this to 0 to let liblzma choose the block size depending * on the compression options. For LZMA2 it will be 3*dict_size * or 1 MiB, whichever is more. * * For each thread, about 3 * block_size bytes of memory will be * allocated. This may change in later liblzma versions. If so, * the memory usage will probably be reduced, not increased. */ uint64_t block_size; /** * \brief Timeout to allow lzma_code() to return early * * Multithreading can make liblzma to consume input and produce * output in a very bursty way: it may first read a lot of input * to fill internal buffers, then no input or output occurs for * a while. * * In single-threaded mode, lzma_code() won't return until it has * either consumed all the input or filled the output buffer. If * this is done in multithreaded mode, it may cause a call * lzma_code() to take even tens of seconds, which isn't acceptable * in all applications. * * To avoid very long blocking times in lzma_code(), a timeout * (in milliseconds) may be set here. If lzma_code() would block * longer than this number of milliseconds, it will return with * LZMA_OK. Reasonable values are 100 ms or more. The xz command * line tool uses 300 ms. * * If long blocking times are fine for you, set timeout to a special * value of 0, which will disable the timeout mechanism and will make * lzma_code() block until all the input is consumed or the output * buffer has been filled. * * \note Even with a timeout, lzma_code() might sometimes take * somewhat long time to return. No timing guarantees * are made. */ uint32_t timeout; /** * \brief Compression preset (level and possible flags) * * The preset is set just like with lzma_easy_encoder(). * The preset is ignored if filters below is non-NULL. */ uint32_t preset; /** * \brief Filter chain (alternative to a preset) * * If this is NULL, the preset above is used. Otherwise the preset * is ignored and the filter chain specified here is used. */ const lzma_filter *filters; /** * \brief Integrity check type * * See check.h for available checks. The xz command line tool * defaults to LZMA_CHECK_CRC64, which is a good choice if you * are unsure. */ lzma_check check; /* * Reserved space to allow possible future extensions without * breaking the ABI. You should not touch these, because the names * of these variables may change. These are and will never be used * with the currently supported options, so it is safe to leave these * uninitialized. */ lzma_reserved_enum reserved_enum1; lzma_reserved_enum reserved_enum2; lzma_reserved_enum reserved_enum3; uint32_t reserved_int1; uint32_t reserved_int2; uint32_t reserved_int3; uint32_t reserved_int4; uint64_t reserved_int5; uint64_t reserved_int6; uint64_t reserved_int7; uint64_t reserved_int8; void *reserved_ptr1; void *reserved_ptr2; void *reserved_ptr3; void *reserved_ptr4; } lzma_mt; /** * \brief Calculate approximate memory usage of easy encoder * * This function is a wrapper for lzma_raw_encoder_memusage(). * * \param preset Compression preset (level and possible flags) * * \return Number of bytes of memory required for the given * preset when encoding. If an error occurs, for example * due to unsupported preset, UINT64_MAX is returned. */ extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) lzma_nothrow lzma_attr_pure; /** * \brief Calculate approximate decoder memory usage of a preset * * This function is a wrapper for lzma_raw_decoder_memusage(). * * \param preset Compression preset (level and possible flags) * * \return Number of bytes of memory required to decompress a file * that was compressed using the given preset. If an error * occurs, for example due to unsupported preset, UINT64_MAX * is returned. */ extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) lzma_nothrow lzma_attr_pure; /** * \brief Initialize .xz Stream encoder using a preset number * * This function is intended for those who just want to use the basic features * if liblzma (that is, most developers out there). * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. * \param preset Compression preset to use. A preset consist of level * number and zero or more flags. Usually flags aren't * used, so preset is simply a number [0, 9] which match * the options -0 ... -9 of the xz command line tool. * Additional flags can be be set using bitwise-or with * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. * \param check Integrity check type to use. See check.h for available * checks. The xz command line tool defaults to * LZMA_CHECK_CRC64, which is a good choice if you are * unsure. LZMA_CHECK_CRC32 is good too as long as the * uncompressed file is not many gigabytes. * * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to * encode your data. * - LZMA_MEM_ERROR: Memory allocation failed. * - LZMA_OPTIONS_ERROR: The given compression preset is not * supported by this build of liblzma. * - LZMA_UNSUPPORTED_CHECK: The given check type is not * supported by this liblzma build. * - LZMA_PROG_ERROR: One or more of the parameters have values * that will never be valid. For example, strm == NULL. * * If initialization fails (return value is not LZMA_OK), all the memory * allocated for *strm by liblzma is always freed. Thus, there is no need * to call lzma_end() after failed initialization. * * If initialization succeeds, use lzma_code() to do the actual encoding. * Valid values for `action' (the second argument of lzma_code()) are * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. */ extern LZMA_API(lzma_ret) lzma_easy_encoder( lzma_stream *strm, uint32_t preset, lzma_check check) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Single-call .xz Stream encoding using a preset number * * The maximum required output buffer size can be calculated with * lzma_stream_buffer_bound(). * * \param preset Compression preset to use. See the description * in lzma_easy_encoder(). * \param check Type of the integrity check to calculate from * uncompressed data. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( uint32_t preset, lzma_check check, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Initialize .xz Stream encoder using a custom filter chain * * \param strm Pointer to properly prepared lzma_stream * \param filters Array of filters. This must be terminated with * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for * more information. * \param check Type of the integrity check to calculate from * uncompressed data. * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, const lzma_filter *filters, lzma_check check) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Calculate approximate memory usage of multithreaded .xz encoder * * Since doing the encoding in threaded mode doesn't affect the memory * requirements of single-threaded decompressor, you can use * lzma_easy_decoder_memusage(options->preset) or * lzma_raw_decoder_memusage(options->filters) to calculate * the decompressor memory requirements. * * \param options Compression options * * \return Number of bytes of memory required for encoding with the * given options. If an error occurs, for example due to * unsupported preset or filter chain, UINT64_MAX is returned. */ extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage( const lzma_mt *options) lzma_nothrow lzma_attr_pure; /** * \brief Initialize multithreaded .xz Stream encoder * * This provides the functionality of lzma_easy_encoder() and * lzma_stream_encoder() as a single function for multithreaded use. * * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH, * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be * added in the future. * * \param strm Pointer to properly prepared lzma_stream * \param options Pointer to multithreaded compression options * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_encoder_mt( lzma_stream *strm, const lzma_mt *options) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize .lzma encoder (legacy file format) * * The .lzma format is sometimes called the LZMA_Alone format, which is the * reason for the name of this function. The .lzma format supports only the * LZMA1 filter. There is no support for integrity checks like CRC32. * * Use this function if and only if you need to create files readable by * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format * is strongly recommended. * * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. * No kind of flushing is supported, because the file format doesn't make * it possible. * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_alone_encoder( lzma_stream *strm, const lzma_options_lzma *options) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Calculate output buffer size for single-call Stream encoder * * When trying to compress uncompressible data, the encoded size will be * slightly bigger than the input data. This function calculates how much * output buffer space is required to be sure that lzma_stream_buffer_encode() * doesn't return LZMA_BUF_ERROR. * * The calculated value is not exact, but it is guaranteed to be big enough. * The actual maximum output space required may be slightly smaller (up to * about 100 bytes). This should not be a problem in practice. * * If the calculated maximum size doesn't fit into size_t or would make the * Stream grow past LZMA_VLI_MAX (which should never happen in practice), * zero is returned to indicate the error. * * \note The limit calculated by this function applies only to * single-call encoding. Multi-call encoding may (and probably * will) have larger maximum expansion when encoding * uncompressible data. Currently there is no function to * calculate the maximum expansion of multi-call encoding. */ extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) lzma_nothrow; /** * \brief Single-call .xz Stream encoder * * \param filters Array of filters. This must be terminated with * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h * for more information. * \param check Type of the integrity check to calculate from * uncompressed data. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_UNSUPPORTED_CHECK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( lzma_filter *filters, lzma_check check, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /************ * Decoding * ************/ /** * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream * being decoded has no integrity check. Note that when used with * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK * if LZMA_TELL_NO_CHECK is used. */ #define LZMA_TELL_NO_CHECK UINT32_C(0x01) /** * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input * stream has an integrity check, but the type of the integrity check is not * supported by this liblzma version or build. Such files can still be * decoded, but the integrity check cannot be verified. */ #define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) /** * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type * of the integrity check is known. The type can then be got with * lzma_get_check(). */ #define LZMA_TELL_ANY_CHECK UINT32_C(0x04) /** * This flag makes lzma_code() not calculate and verify the integrity check * of the compressed data in .xz files. This means that invalid integrity * check values won't be detected and LZMA_DATA_ERROR won't be returned in * such cases. * * This flag only affects the checks of the compressed data itself; the CRC32 * values in the .xz headers will still be verified normally. * * Don't use this flag unless you know what you are doing. Possible reasons * to use this flag: * * - Trying to recover data from a corrupt .xz file. * * - Speeding up decompression, which matters mostly with SHA-256 * or with files that have compressed extremely well. It's recommended * to not use this flag for this purpose unless the file integrity is * verified externally in some other way. * * Support for this flag was added in liblzma 5.1.4beta. */ #define LZMA_IGNORE_CHECK UINT32_C(0x10) /** * This flag enables decoding of concatenated files with file formats that * allow concatenating compressed files as is. From the formats currently * supported by liblzma, only the .xz format allows concatenated files. * Concatenated files are not allowed with the legacy .lzma format. * * This flag also affects the usage of the `action' argument for lzma_code(). * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END * unless LZMA_FINISH is used as `action'. Thus, the application has to set * LZMA_FINISH in the same way as it does when encoding. * * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. */ #define LZMA_CONCATENATED UINT32_C(0x08) /** * \brief Initialize .xz Stream decoder * * \param strm Pointer to properly prepared lzma_stream * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma * 5.2.3 and earlier don't allow 0 here and return * LZMA_PROG_ERROR; later versions treat 0 as if 1 * had been specified. * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, - * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED + * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, + * LZMA_CONCATENATED * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. * - LZMA_OPTIONS_ERROR: Unsupported flags * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_decoder( lzma_stream *strm, uint64_t memlimit, uint32_t flags) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Decode .xz Streams and .lzma files with autodetection * * This decoder autodetects between the .xz and .lzma file formats, and * calls lzma_stream_decoder() or lzma_alone_decoder() once the type * of the input file has been detected. * + * If the flag LZMA_CONCATENATED is used and the input is a .lzma file: + * For historical reasons concatenated .lzma files aren't supported. + * If there is trailing data after one .lzma stream, lzma_code() will + * return LZMA_DATA_ERROR. (lzma_alone_decoder() doesn't have such a check + * as it doesn't support any decoder flags. It will return LZMA_STREAM_END + * after one .lzma stream.) + * * \param strm Pointer to properly prepared lzma_stream * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma * 5.2.3 and earlier don't allow 0 here and return * LZMA_PROG_ERROR; later versions treat 0 as if 1 * had been specified. - * \param flags Bitwise-or of flags, or zero for no flags. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, + * LZMA_CONCATENATED * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. * - LZMA_OPTIONS_ERROR: Unsupported flags * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_auto_decoder( lzma_stream *strm, uint64_t memlimit, uint32_t flags) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize .lzma decoder (legacy file format) * * \param strm Pointer to properly prepared lzma_stream * \param memlimit Memory usage limit as bytes. Use UINT64_MAX * to effectively disable the limiter. liblzma * 5.2.3 and earlier don't allow 0 here and return * LZMA_PROG_ERROR; later versions treat 0 as if 1 * had been specified. * * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. * There is no need to use LZMA_FINISH, but it's allowed because it may * simplify certain types of applications. * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_alone_decoder( lzma_stream *strm, uint64_t memlimit) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Single-call .xz Stream decoder * * \param memlimit Pointer to how much memory the decoder is allowed * to allocate. The value pointed by this pointer is * modified if and only if LZMA_MEMLIMIT_ERROR is * returned. * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, - * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK - * is not allowed and will return LZMA_PROG_ERROR. + * LZMA_IGNORE_CHECK, LZMA_CONCATENATED. Note that + * LZMA_TELL_ANY_CHECK is not allowed and will + * return LZMA_PROG_ERROR. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * *in_pos is updated only if decoding succeeds. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if decoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return - LZMA_OK: Decoding was successful. * - LZMA_FORMAT_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_DATA_ERROR * - LZMA_NO_CHECK: This can be returned only if using * the LZMA_TELL_NO_CHECK flag. * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using * the LZMA_TELL_UNSUPPORTED_CHECK flag. * - LZMA_MEM_ERROR * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. * The minimum required memlimit value was stored to *memlimit. * - LZMA_BUF_ERROR: Output buffer was too small. * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( uint64_t *memlimit, uint32_t flags, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; diff --git a/contrib/xz/src/liblzma/api/lzma/filter.h b/contrib/xz/src/liblzma/api/lzma/filter.h index 8c8593147658..04825c655eab 100644 --- a/contrib/xz/src/liblzma/api/lzma/filter.h +++ b/contrib/xz/src/liblzma/api/lzma/filter.h @@ -1,426 +1,428 @@ /** * \file lzma/filter.h * \brief Common filter related types and functions */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** * \brief Maximum number of filters in a chain * * A filter chain can have 1-4 filters, of which three are allowed to change * the size of the data. Usually only one or two filters are needed. */ #define LZMA_FILTERS_MAX 4 /** * \brief Filter options * * This structure is used to pass Filter ID and a pointer filter's * options to liblzma. A few functions work with a single lzma_filter * structure, while most functions expect a filter chain. * * A filter chain is indicated with an array of lzma_filter structures. * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to * be able to hold any arbitrary filter chain. This is important when * using lzma_block_header_decode() from block.h, because too small * array would make liblzma write past the end of the filters array. */ typedef struct { /** * \brief Filter ID * * Use constants whose name begin with `LZMA_FILTER_' to specify * different filters. In an array of lzma_filter structures, use * LZMA_VLI_UNKNOWN to indicate end of filters. * * \note This is not an enum, because on some systems enums * cannot be 64-bit. */ lzma_vli id; /** * \brief Pointer to filter-specific options structure * * If the filter doesn't need options, set this to NULL. If id is * set to LZMA_VLI_UNKNOWN, options is ignored, and thus * doesn't need be initialized. */ void *options; } lzma_filter; /** * \brief Test if the given Filter ID is supported for encoding * * Return true if the give Filter ID is supported for encoding by this * liblzma build. Otherwise false is returned. * * There is no way to list which filters are available in this particular * liblzma version and build. It would be useless, because the application * couldn't know what kind of options the filter would need. */ extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) lzma_nothrow lzma_attr_const; /** * \brief Test if the given Filter ID is supported for decoding * * Return true if the give Filter ID is supported for decoding by this * liblzma build. Otherwise false is returned. */ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) lzma_nothrow lzma_attr_const; /** * \brief Copy the filters array * * Copy the Filter IDs and filter-specific options from src to dest. * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that * src is smaller than that. * * Unless the filter-specific options is NULL, the Filter ID has to be * supported by liblzma, because liblzma needs to know the size of every * filter-specific options structure. The filter-specific options are not * validated. If options is NULL, any unsupported Filter IDs are copied * without returning an error. * * Old filter-specific options in dest are not freed, so dest doesn't * need to be initialized by the caller in any way. * * If an error occurs, memory possibly already allocated by this function - * is always freed. + * is always freed. liblzma versions older than 5.2.7 may modify the dest + * array and leave its contents in an undefined state if an error occurs. + * liblzma 5.2.7 and newer only modify the dest array when returning LZMA_OK. * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options * is not NULL. * - LZMA_PROG_ERROR: src or dest is NULL. */ extern LZMA_API(lzma_ret) lzma_filters_copy( const lzma_filter *src, lzma_filter *dest, const lzma_allocator *allocator) lzma_nothrow; /** * \brief Calculate approximate memory requirements for raw encoder * * This function can be used to calculate the memory requirements for * Block and Stream encoders too because Block and Stream encoders don't * need significantly more memory than raw encoder. * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Number of bytes of memory required for the given * filter chain when encoding. If an error occurs, * for example due to unsupported filter chain, * UINT64_MAX is returned. */ extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) lzma_nothrow lzma_attr_pure; /** * \brief Calculate approximate memory requirements for raw decoder * * This function can be used to calculate the memory requirements for * Block and Stream decoders too because Block and Stream decoders don't * need significantly more memory than raw decoder. * * \param filters Array of filters terminated with * .id == LZMA_VLI_UNKNOWN. * * \return Number of bytes of memory required for the given * filter chain when decoding. If an error occurs, * for example due to unsupported filter chain, * UINT64_MAX is returned. */ extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) lzma_nothrow lzma_attr_pure; /** * \brief Initialize raw encoder * * This function may be useful when implementing custom file formats. * * \param strm Pointer to properly prepared lzma_stream * \param filters Array of lzma_filter structures. The end of the * array must be marked with .id = LZMA_VLI_UNKNOWN. * * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the * filter chain supports it), or LZMA_FINISH. * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_raw_encoder( lzma_stream *strm, const lzma_filter *filters) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Initialize raw decoder * * The initialization of raw decoder goes similarly to raw encoder. * * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using * LZMA_FINISH is not required, it is supported just for convenience. * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_raw_decoder( lzma_stream *strm, const lzma_filter *filters) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Update the filter chain in the encoder * * This function is for advanced users only. This function has two slightly * different purposes: * * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter * chain, which will be used starting from the next Block. * * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change * the filter-specific options in the middle of encoding. The actual * filters in the chain (Filter IDs) cannot be changed. In the future, * it might become possible to change the filter options without * using LZMA_SYNC_FLUSH. * * While rarely useful, this function may be called also when no data has * been compressed yet. In that case, this function will behave as if * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block * encoder) had been used right before calling this function. * * \return - LZMA_OK * - LZMA_MEM_ERROR * - LZMA_MEMLIMIT_ERROR * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_filters_update( lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; /** * \brief Single-call raw encoder * * \param filters Array of lzma_filter structures. The end of the * array must be marked with .id = LZMA_VLI_UNKNOWN. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_size Size of the input buffer * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return - LZMA_OK: Encoding was successful. * - LZMA_BUF_ERROR: Not enough output buffer space. * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_DATA_ERROR * - LZMA_PROG_ERROR * * \note There is no function to calculate how big output buffer * would surely be big enough. (lzma_stream_buffer_bound() * works only for lzma_stream_buffer_encode(); raw encoder * won't necessarily meet that bound.) */ extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( const lzma_filter *filters, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Single-call raw decoder * * \param filters Array of lzma_filter structures. The end of the * array must be marked with .id = LZMA_VLI_UNKNOWN. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * *in_pos is updated only if decoding succeeds. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * *out_pos is updated only if encoding succeeds. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. */ extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( const lzma_filter *filters, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Get the size of the Filter Properties field * * This function may be useful when implementing custom file formats * using the raw encoder and decoder. * * \param size Pointer to uint32_t to hold the size of the properties * \param filter Filter ID and options (the size of the properties may * vary depending on the options) * * \return - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR * * \note This function validates the Filter ID, but does not * necessarily validate the options. Thus, it is possible * that this returns LZMA_OK while the following call to * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. */ extern LZMA_API(lzma_ret) lzma_properties_size( uint32_t *size, const lzma_filter *filter) lzma_nothrow; /** * \brief Encode the Filter Properties field * * \param filter Filter ID and options * \param props Buffer to hold the encoded options. The size of * buffer must have been already determined with * lzma_properties_size(). * * \return - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_PROG_ERROR * * \note Even this function won't validate more options than actually * necessary. Thus, it is possible that encoding the properties * succeeds but using the same options to initialize the encoder * will fail. * * \note If lzma_properties_size() indicated that the size * of the Filter Properties field is zero, calling * lzma_properties_encode() is not required, but it * won't do any harm either. */ extern LZMA_API(lzma_ret) lzma_properties_encode( const lzma_filter *filter, uint8_t *props) lzma_nothrow; /** * \brief Decode the Filter Properties field * * \param filter filter->id must have been set to the correct * Filter ID. filter->options doesn't need to be * initialized (it's not freed by this function). The * decoded options will be stored in filter->options; * it's application's responsibility to free it when * appropriate. filter->options is set to NULL if * there are no properties or if an error occurs. * \param allocator Custom memory allocator used to allocate the * options. Set to NULL to use the default malloc(), * and in case of an error, also free(). * \param props Input buffer containing the properties. * \param props_size Size of the properties. This must be the exact * size; giving too much or too little input will * return LZMA_OPTIONS_ERROR. * * \return - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR */ extern LZMA_API(lzma_ret) lzma_properties_decode( lzma_filter *filter, const lzma_allocator *allocator, const uint8_t *props, size_t props_size) lzma_nothrow; /** * \brief Calculate encoded size of a Filter Flags field * * Knowing the size of Filter Flags is useful to know when allocating * memory to hold the encoded Filter Flags. * * \param size Pointer to integer to hold the calculated size * \param filter Filter ID and associated options whose encoded * size is to be calculated * * \return - LZMA_OK: *size set successfully. Note that this doesn't * guarantee that filter->options is valid, thus * lzma_filter_flags_encode() may still fail. * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. * - LZMA_PROG_ERROR: Invalid options * * \note If you need to calculate size of List of Filter Flags, * you need to loop over every lzma_filter entry. */ extern LZMA_API(lzma_ret) lzma_filter_flags_size( uint32_t *size, const lzma_filter *filter) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Encode Filter Flags into given buffer * * In contrast to some functions, this doesn't allocate the needed buffer. * This is due to how this function is used internally by liblzma. * * \param filter Filter ID and options to be encoded * \param out Beginning of the output buffer * \param out_pos out[*out_pos] is the next write position. This * is updated by the encoder. * \param out_size out[out_size] is the first byte to not write. * * \return - LZMA_OK: Encoding was successful. * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. * - LZMA_PROG_ERROR: Invalid options or not enough output * buffer space (you should have checked it with * lzma_filter_flags_size()). */ extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow lzma_attr_warn_unused_result; /** * \brief Decode Filter Flags from given buffer * * The decoded result is stored into *filter. The old value of * filter->options is not free()d. * * \return - LZMA_OK * - LZMA_OPTIONS_ERROR * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ extern LZMA_API(lzma_ret) lzma_filter_flags_decode( lzma_filter *filter, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size) lzma_nothrow lzma_attr_warn_unused_result; diff --git a/contrib/xz/src/liblzma/api/lzma/version.h b/contrib/xz/src/liblzma/api/lzma/version.h index 2bf3eaed24fa..fcae6ea2abc2 100644 --- a/contrib/xz/src/liblzma/api/lzma/version.h +++ b/contrib/xz/src/liblzma/api/lzma/version.h @@ -1,121 +1,121 @@ /** * \file lzma/version.h * \brief Version number */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /* * Version number split into components */ #define LZMA_VERSION_MAJOR 5 #define LZMA_VERSION_MINOR 2 -#define LZMA_VERSION_PATCH 5 +#define LZMA_VERSION_PATCH 8 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE #ifndef LZMA_VERSION_COMMIT # define LZMA_VERSION_COMMIT "" #endif /* * Map symbolic stability levels to integers. */ #define LZMA_VERSION_STABILITY_ALPHA 0 #define LZMA_VERSION_STABILITY_BETA 1 #define LZMA_VERSION_STABILITY_STABLE 2 /** * \brief Compile-time version number * * The version number is of format xyyyzzzs where * - x = major * - yyy = minor * - zzz = revision * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable * * The same xyyyzzz triplet is never reused with different stability levels. * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta * or 5.1.0 stable. * * \note The version number of liblzma has nothing to with * the version number of Igor Pavlov's LZMA SDK. */ #define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + LZMA_VERSION_MINOR * UINT32_C(10000) \ + LZMA_VERSION_PATCH * UINT32_C(10) \ + LZMA_VERSION_STABILITY) /* * Macros to construct the compile-time version string */ #if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA # define LZMA_VERSION_STABILITY_STRING "alpha" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA # define LZMA_VERSION_STABILITY_STRING "beta" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE # define LZMA_VERSION_STABILITY_STRING "" #else # error Incorrect LZMA_VERSION_STABILITY #endif #define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ #major "." #minor "." #patch stability commit #define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) /** * \brief Compile-time version as a string * * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable * versions don't have any "stable" suffix). In future, a snapshot built * from source code repository may include an additional suffix, for example * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form * in LZMA_VERSION macro. */ #define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ LZMA_VERSION_COMMIT) /* #ifndef is needed for use with windres (MinGW or Cygwin). */ #ifndef LZMA_H_INTERNAL_RC /** * \brief Run-time version number as an integer * * Return the value of LZMA_VERSION macro at the compile time of liblzma. * This allows the application to compare if it was built against the same, * older, or newer version of liblzma that is currently running. */ extern LZMA_API(uint32_t) lzma_version_number(void) lzma_nothrow lzma_attr_const; /** * \brief Run-time version as a string * * This function may be useful if you want to display which version of * liblzma your application is currently using. */ extern LZMA_API(const char *) lzma_version_string(void) lzma_nothrow lzma_attr_const; #endif diff --git a/contrib/xz/src/liblzma/api/lzma/vli.h b/contrib/xz/src/liblzma/api/lzma/vli.h index 1b7a952a406d..7a0a803dfc9c 100644 --- a/contrib/xz/src/liblzma/api/lzma/vli.h +++ b/contrib/xz/src/liblzma/api/lzma/vli.h @@ -1,166 +1,168 @@ /** * \file lzma/vli.h * \brief Variable-length integer handling * * In the .xz format, most integers are encoded in a variable-length * representation, which is sometimes called little endian base-128 encoding. * This saves space when smaller values are more likely than bigger values. * * The encoding scheme encodes seven bits to every byte, using minimum * number of bytes required to represent the given value. Encodings that use * non-minimum number of bytes are invalid, thus every integer has exactly * one encoded representation. The maximum number of bits in a VLI is 63, * thus the vli argument must be less than or equal to UINT64_MAX / 2. You * should use LZMA_VLI_MAX for clarity. */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /** * \brief Maximum supported value of a variable-length integer */ #define LZMA_VLI_MAX (UINT64_MAX / 2) /** * \brief VLI value to denote that the value is unknown */ #define LZMA_VLI_UNKNOWN UINT64_MAX /** * \brief Maximum supported encoded length of variable length integers */ #define LZMA_VLI_BYTES_MAX 9 /** * \brief VLI constant suffix */ #define LZMA_VLI_C(n) UINT64_C(n) /** * \brief Variable-length integer type * * Valid VLI values are in the range [0, LZMA_VLI_MAX]. Unknown value is * indicated with LZMA_VLI_UNKNOWN, which is the maximum value of the * underlying integer type. * * lzma_vli will be uint64_t for the foreseeable future. If a bigger size * is needed in the future, it is guaranteed that 2 * LZMA_VLI_MAX will * not overflow lzma_vli. This simplifies integer overflow detection. */ typedef uint64_t lzma_vli; /** * \brief Validate a variable-length integer * * This is useful to test that application has given acceptable values * for example in the uncompressed_size and compressed_size variables. * * \return True if the integer is representable as VLI or if it * indicates unknown value. */ #define lzma_vli_is_valid(vli) \ ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) /** * \brief Encode a variable-length integer * * This function has two modes: single-call and multi-call. Single-call mode * encodes the whole integer at once; it is an error if the output buffer is * too small. Multi-call mode saves the position in *vli_pos, and thus it is * possible to continue encoding if the buffer becomes full before the whole * integer has been encoded. * * \param vli Integer to be encoded * \param vli_pos How many VLI-encoded bytes have already been written * out. When starting to encode a new integer in * multi-call mode, *vli_pos must be set to zero. * To use single-call encoding, set vli_pos to NULL. * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * * \return Slightly different return values are used in multi-call and * single-call modes. * * Single-call (vli_pos == NULL): * - LZMA_OK: Integer successfully encoded. * - LZMA_PROG_ERROR: Arguments are not sane. This can be due * to too little output space; single-call mode doesn't use * LZMA_BUF_ERROR, since the application should have checked * the encoded size with lzma_vli_size(). * * Multi-call (vli_pos != NULL): * - LZMA_OK: So far all OK, but the integer is not * completely written out yet. * - LZMA_STREAM_END: Integer successfully encoded. * - LZMA_BUF_ERROR: No output space was provided. * - LZMA_PROG_ERROR: Arguments are not sane. */ extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, size_t *vli_pos, uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; /** * \brief Decode a variable-length integer * * Like lzma_vli_encode(), this function has single-call and multi-call modes. * * \param vli Pointer to decoded integer. The decoder will * initialize it to zero when *vli_pos == 0, so * application isn't required to initialize *vli. * \param vli_pos How many bytes have already been decoded. When * starting to decode a new integer in multi-call * mode, *vli_pos must be initialized to zero. To * use single-call decoding, set vli_pos to NULL. * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * * \return Slightly different return values are used in multi-call and * single-call modes. * * Single-call (vli_pos == NULL): * - LZMA_OK: Integer successfully decoded. * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting * the end of the input buffer before the whole integer was * decoded; providing no input at all will use LZMA_DATA_ERROR. * - LZMA_PROG_ERROR: Arguments are not sane. * * Multi-call (vli_pos != NULL): * - LZMA_OK: So far all OK, but the integer is not * completely decoded yet. * - LZMA_STREAM_END: Integer successfully decoded. * - LZMA_DATA_ERROR: Integer is corrupt. * - LZMA_BUF_ERROR: No input was provided. * - LZMA_PROG_ERROR: Arguments are not sane. */ extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos, const uint8_t *in, size_t *in_pos, size_t in_size) lzma_nothrow; /** * \brief Get the number of bytes required to encode a VLI * + * \param vli Integer whose encoded size is to be determined + * * \return Number of bytes on success (1-9). If vli isn't valid, * zero is returned. */ extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) lzma_nothrow lzma_attr_pure; diff --git a/contrib/xz/src/liblzma/check/crc32_x86.S b/contrib/xz/src/liblzma/check/crc32_x86.S index 1ef7f7e37ae9..4f395df8122a 100644 --- a/contrib/xz/src/liblzma/check/crc32_x86.S +++ b/contrib/xz/src/liblzma/check/crc32_x86.S @@ -1,304 +1,313 @@ /* * Speed-optimized CRC32 using slicing-by-eight algorithm * * This uses only i386 instructions, but it is optimized for i686 and later * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). For i586 * (e.g. Pentium), slicing-by-four would be better, and even the C version * of slicing-by-eight built with gcc -march=i586 tends to be a little bit * better than this. Very few probably run this code on i586 or older x86 * so this shouldn't be a problem in practice. * * Authors: Igor Pavlov (original version) * Lasse Collin (AT&T syntax, PIC support, better portability) * * This file has been put into the public domain. * You can do whatever you want with this file. * * This code needs lzma_crc32_table, which can be created using the * following C code: uint32_t lzma_crc32_table[8][256]; void init_table(void) { // IEEE-802.3 static const uint32_t poly32 = UINT32_C(0xEDB88320); // Castagnoli // static const uint32_t poly32 = UINT32_C(0x82F63B78); // Koopman // static const uint32_t poly32 = UINT32_C(0xEB31D82E); for (size_t s = 0; s < 8; ++s) { for (size_t b = 0; b < 256; ++b) { uint32_t r = s == 0 ? b : lzma_crc32_table[s - 1][b]; for (size_t i = 0; i < 8; ++i) { if (r & 1) r = (r >> 1) ^ poly32; else r >>= 1; } lzma_crc32_table[s][b] = r; } } } * The prototype of the CRC32 function: * extern uint32_t lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc); */ +/* When Intel CET is enabled, include in assembly code to mark + Intel CET support. */ +#ifdef __CET__ +# include +#else +# define _CET_ENDBR +#endif + /* * On some systems, the functions need to be prefixed. The prefix is * usually an underscore. */ #ifndef __USER_LABEL_PREFIX__ # define __USER_LABEL_PREFIX__ #endif #define MAKE_SYM_CAT(prefix, sym) prefix ## sym #define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) #define LZMA_CRC32 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32) #define LZMA_CRC32_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc32_table) /* * Solaris assembler doesn't have .p2align, and Darwin uses .align * differently than GNU/Linux and Solaris. */ #if defined(__APPLE__) || defined(__MSDOS__) # define ALIGN(pow2, abs) .align pow2 #else # define ALIGN(pow2, abs) .align abs #endif .text .globl LZMA_CRC32 #if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ && !defined(__MSDOS__) .type LZMA_CRC32, @function #endif ALIGN(4, 16) LZMA_CRC32: + _CET_ENDBR /* * Register usage: * %eax crc * %esi buf * %edi size or buf + size * %ebx lzma_crc32_table * %ebp Table index * %ecx Temporary * %edx Temporary */ pushl %ebx pushl %esi pushl %edi pushl %ebp movl 0x14(%esp), %esi /* buf */ movl 0x18(%esp), %edi /* size */ movl 0x1C(%esp), %eax /* crc */ /* * Store the address of lzma_crc32_table to %ebx. This is needed to * get position-independent code (PIC). * * The PIC macro is defined by libtool, while __PIC__ is defined * by GCC but only on some systems. Testing for both makes it simpler * to test this code without libtool, and keeps the code working also * when built with libtool but using something else than GCC. * * I understood that libtool may define PIC on Windows even though * the code in Windows DLLs is not PIC in sense that it is in ELF * binaries, so we need a separate check to always use the non-PIC * code on Windows. */ #if (!defined(PIC) && !defined(__PIC__)) \ || (defined(_WIN32) || defined(__CYGWIN__)) /* Not PIC */ movl $ LZMA_CRC32_TABLE, %ebx #elif defined(__APPLE__) /* Mach-O */ call .L_get_pc .L_pic: leal .L_lzma_crc32_table$non_lazy_ptr-.L_pic(%ebx), %ebx movl (%ebx), %ebx #else /* ELF */ call .L_get_pc addl $_GLOBAL_OFFSET_TABLE_, %ebx movl LZMA_CRC32_TABLE@GOT(%ebx), %ebx #endif /* Complement the initial value. */ notl %eax ALIGN(4, 16) .L_align: /* * Check if there is enough input to use slicing-by-eight. * We need 16 bytes, because the loop pre-reads eight bytes. */ cmpl $16, %edi jb .L_rest /* Check if we have reached alignment of eight bytes. */ testl $7, %esi jz .L_slice /* Calculate CRC of the next input byte. */ movzbl (%esi), %ebp incl %esi movzbl %al, %ecx xorl %ecx, %ebp shrl $8, %eax xorl (%ebx, %ebp, 4), %eax decl %edi jmp .L_align ALIGN(2, 4) .L_slice: /* * If we get here, there's at least 16 bytes of aligned input * available. Make %edi multiple of eight bytes. Store the possible * remainder over the "size" variable in the argument stack. */ movl %edi, 0x18(%esp) andl $-8, %edi subl %edi, 0x18(%esp) /* * Let %edi be buf + size - 8 while running the main loop. This way * we can compare for equality to determine when exit the loop. */ addl %esi, %edi subl $8, %edi /* Read in the first eight aligned bytes. */ xorl (%esi), %eax movl 4(%esi), %ecx movzbl %cl, %ebp .L_loop: movl 0x0C00(%ebx, %ebp, 4), %edx movzbl %ch, %ebp xorl 0x0800(%ebx, %ebp, 4), %edx shrl $16, %ecx xorl 8(%esi), %edx movzbl %cl, %ebp xorl 0x0400(%ebx, %ebp, 4), %edx movzbl %ch, %ebp xorl (%ebx, %ebp, 4), %edx movzbl %al, %ebp /* * Read the next four bytes, for which the CRC is calculated - * on the next interation of the loop. + * on the next iteration of the loop. */ movl 12(%esi), %ecx xorl 0x1C00(%ebx, %ebp, 4), %edx movzbl %ah, %ebp shrl $16, %eax xorl 0x1800(%ebx, %ebp, 4), %edx movzbl %ah, %ebp movzbl %al, %eax movl 0x1400(%ebx, %eax, 4), %eax addl $8, %esi xorl %edx, %eax xorl 0x1000(%ebx, %ebp, 4), %eax /* Check for end of aligned input. */ cmpl %edi, %esi movzbl %cl, %ebp jne .L_loop /* * Process the remaining eight bytes, which we have already * copied to %ecx and %edx. */ movl 0x0C00(%ebx, %ebp, 4), %edx movzbl %ch, %ebp xorl 0x0800(%ebx, %ebp, 4), %edx shrl $16, %ecx movzbl %cl, %ebp xorl 0x0400(%ebx, %ebp, 4), %edx movzbl %ch, %ebp xorl (%ebx, %ebp, 4), %edx movzbl %al, %ebp xorl 0x1C00(%ebx, %ebp, 4), %edx movzbl %ah, %ebp shrl $16, %eax xorl 0x1800(%ebx, %ebp, 4), %edx movzbl %ah, %ebp movzbl %al, %eax movl 0x1400(%ebx, %eax, 4), %eax addl $8, %esi xorl %edx, %eax xorl 0x1000(%ebx, %ebp, 4), %eax /* Copy the number of remaining bytes to %edi. */ movl 0x18(%esp), %edi .L_rest: /* Check for end of input. */ testl %edi, %edi jz .L_return /* Calculate CRC of the next input byte. */ movzbl (%esi), %ebp incl %esi movzbl %al, %ecx xorl %ecx, %ebp shrl $8, %eax xorl (%ebx, %ebp, 4), %eax decl %edi jmp .L_rest .L_return: /* Complement the final value. */ notl %eax popl %ebp popl %edi popl %esi popl %ebx ret #if defined(PIC) || defined(__PIC__) ALIGN(4, 16) .L_get_pc: movl (%esp), %ebx ret #endif #if defined(__APPLE__) && (defined(PIC) || defined(__PIC__)) /* Mach-O PIC */ .section __IMPORT,__pointers,non_lazy_symbol_pointers .L_lzma_crc32_table$non_lazy_ptr: .indirect_symbol LZMA_CRC32_TABLE .long 0 #elif defined(_WIN32) || defined(__CYGWIN__) # ifdef DLL_EXPORT /* This is equivalent of __declspec(dllexport). */ .section .drectve .ascii " -export:lzma_crc32" # endif #elif !defined(__MSDOS__) /* ELF */ .size LZMA_CRC32, .-LZMA_CRC32 #endif /* * This is needed to support non-executable stack. It's ugly to - * use __linux__ here, but I don't know a way to detect when + * use __FreeBSD__ and __linux__ here, but I don't know a way to detect when * we are using GNU assembler. */ #if defined(__ELF__) && (defined(__FreeBSD__) || defined(__linux__)) .section .note.GNU-stack,"",@progbits #endif diff --git a/contrib/xz/src/liblzma/check/crc64_x86.S b/contrib/xz/src/liblzma/check/crc64_x86.S index 81e06261e0f2..9aecf5865455 100644 --- a/contrib/xz/src/liblzma/check/crc64_x86.S +++ b/contrib/xz/src/liblzma/check/crc64_x86.S @@ -1,287 +1,296 @@ /* * Speed-optimized CRC64 using slicing-by-four algorithm * * This uses only i386 instructions, but it is optimized for i686 and later * (including e.g. Pentium II/III/IV, Athlon XP, and Core 2). * * Authors: Igor Pavlov (original CRC32 assembly code) * Lasse Collin (CRC64 adaptation of the modified CRC32 code) * * This file has been put into the public domain. * You can do whatever you want with this file. * * This code needs lzma_crc64_table, which can be created using the * following C code: uint64_t lzma_crc64_table[4][256]; void init_table(void) { // ECMA-182 static const uint64_t poly64 = UINT64_C(0xC96C5795D7870F42); for (size_t s = 0; s < 4; ++s) { for (size_t b = 0; b < 256; ++b) { uint64_t r = s == 0 ? b : lzma_crc64_table[s - 1][b]; for (size_t i = 0; i < 8; ++i) { if (r & 1) r = (r >> 1) ^ poly64; else r >>= 1; } lzma_crc64_table[s][b] = r; } } } * The prototype of the CRC64 function: * extern uint64_t lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc); */ +/* When Intel CET is enabled, include in assembly code to mark + Intel CET support. */ +#ifdef __CET__ +# include +#else +# define _CET_ENDBR +#endif + /* * On some systems, the functions need to be prefixed. The prefix is * usually an underscore. */ #ifndef __USER_LABEL_PREFIX__ # define __USER_LABEL_PREFIX__ #endif #define MAKE_SYM_CAT(prefix, sym) prefix ## sym #define MAKE_SYM(prefix, sym) MAKE_SYM_CAT(prefix, sym) #define LZMA_CRC64 MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64) #define LZMA_CRC64_TABLE MAKE_SYM(__USER_LABEL_PREFIX__, lzma_crc64_table) /* * Solaris assembler doesn't have .p2align, and Darwin uses .align * differently than GNU/Linux and Solaris. */ #if defined(__APPLE__) || defined(__MSDOS__) # define ALIGN(pow2, abs) .align pow2 #else # define ALIGN(pow2, abs) .align abs #endif .text .globl LZMA_CRC64 #if !defined(__APPLE__) && !defined(_WIN32) && !defined(__CYGWIN__) \ && !defined(__MSDOS__) .type LZMA_CRC64, @function #endif ALIGN(4, 16) LZMA_CRC64: + _CET_ENDBR /* * Register usage: * %eax crc LSB * %edx crc MSB * %esi buf * %edi size or buf + size * %ebx lzma_crc64_table * %ebp Table index * %ecx Temporary */ pushl %ebx pushl %esi pushl %edi pushl %ebp movl 0x14(%esp), %esi /* buf */ movl 0x18(%esp), %edi /* size */ movl 0x1C(%esp), %eax /* crc LSB */ movl 0x20(%esp), %edx /* crc MSB */ /* * Store the address of lzma_crc64_table to %ebx. This is needed to * get position-independent code (PIC). * * The PIC macro is defined by libtool, while __PIC__ is defined * by GCC but only on some systems. Testing for both makes it simpler * to test this code without libtool, and keeps the code working also * when built with libtool but using something else than GCC. * * I understood that libtool may define PIC on Windows even though * the code in Windows DLLs is not PIC in sense that it is in ELF * binaries, so we need a separate check to always use the non-PIC * code on Windows. */ #if (!defined(PIC) && !defined(__PIC__)) \ || (defined(_WIN32) || defined(__CYGWIN__)) /* Not PIC */ movl $ LZMA_CRC64_TABLE, %ebx #elif defined(__APPLE__) /* Mach-O */ call .L_get_pc .L_pic: leal .L_lzma_crc64_table$non_lazy_ptr-.L_pic(%ebx), %ebx movl (%ebx), %ebx #else /* ELF */ call .L_get_pc addl $_GLOBAL_OFFSET_TABLE_, %ebx movl LZMA_CRC64_TABLE@GOT(%ebx), %ebx #endif /* Complement the initial value. */ notl %eax notl %edx .L_align: /* * Check if there is enough input to use slicing-by-four. * We need eight bytes, because the loop pre-reads four bytes. */ cmpl $8, %edi jb .L_rest /* Check if we have reached alignment of four bytes. */ testl $3, %esi jz .L_slice /* Calculate CRC of the next input byte. */ movzbl (%esi), %ebp incl %esi movzbl %al, %ecx xorl %ecx, %ebp shrdl $8, %edx, %eax xorl (%ebx, %ebp, 8), %eax shrl $8, %edx xorl 4(%ebx, %ebp, 8), %edx decl %edi jmp .L_align .L_slice: /* * If we get here, there's at least eight bytes of aligned input * available. Make %edi multiple of four bytes. Store the possible * remainder over the "size" variable in the argument stack. */ movl %edi, 0x18(%esp) andl $-4, %edi subl %edi, 0x18(%esp) /* * Let %edi be buf + size - 4 while running the main loop. This way * we can compare for equality to determine when exit the loop. */ addl %esi, %edi subl $4, %edi /* Read in the first four aligned bytes. */ movl (%esi), %ecx .L_loop: xorl %eax, %ecx movzbl %cl, %ebp movl 0x1800(%ebx, %ebp, 8), %eax xorl %edx, %eax movl 0x1804(%ebx, %ebp, 8), %edx movzbl %ch, %ebp xorl 0x1000(%ebx, %ebp, 8), %eax xorl 0x1004(%ebx, %ebp, 8), %edx shrl $16, %ecx movzbl %cl, %ebp xorl 0x0800(%ebx, %ebp, 8), %eax xorl 0x0804(%ebx, %ebp, 8), %edx movzbl %ch, %ebp addl $4, %esi xorl (%ebx, %ebp, 8), %eax xorl 4(%ebx, %ebp, 8), %edx /* Check for end of aligned input. */ cmpl %edi, %esi /* * Copy the next input byte to %ecx. It is slightly faster to * read it here than at the top of the loop. */ movl (%esi), %ecx jb .L_loop /* * Process the remaining four bytes, which we have already * copied to %ecx. */ xorl %eax, %ecx movzbl %cl, %ebp movl 0x1800(%ebx, %ebp, 8), %eax xorl %edx, %eax movl 0x1804(%ebx, %ebp, 8), %edx movzbl %ch, %ebp xorl 0x1000(%ebx, %ebp, 8), %eax xorl 0x1004(%ebx, %ebp, 8), %edx shrl $16, %ecx movzbl %cl, %ebp xorl 0x0800(%ebx, %ebp, 8), %eax xorl 0x0804(%ebx, %ebp, 8), %edx movzbl %ch, %ebp addl $4, %esi xorl (%ebx, %ebp, 8), %eax xorl 4(%ebx, %ebp, 8), %edx /* Copy the number of remaining bytes to %edi. */ movl 0x18(%esp), %edi .L_rest: /* Check for end of input. */ testl %edi, %edi jz .L_return /* Calculate CRC of the next input byte. */ movzbl (%esi), %ebp incl %esi movzbl %al, %ecx xorl %ecx, %ebp shrdl $8, %edx, %eax xorl (%ebx, %ebp, 8), %eax shrl $8, %edx xorl 4(%ebx, %ebp, 8), %edx decl %edi jmp .L_rest .L_return: /* Complement the final value. */ notl %eax notl %edx popl %ebp popl %edi popl %esi popl %ebx ret #if defined(PIC) || defined(__PIC__) ALIGN(4, 16) .L_get_pc: movl (%esp), %ebx ret #endif #if defined(__APPLE__) && (defined(PIC) || defined(__PIC__)) /* Mach-O PIC */ .section __IMPORT,__pointers,non_lazy_symbol_pointers .L_lzma_crc64_table$non_lazy_ptr: .indirect_symbol LZMA_CRC64_TABLE .long 0 #elif defined(_WIN32) || defined(__CYGWIN__) # ifdef DLL_EXPORT /* This is equivalent of __declspec(dllexport). */ .section .drectve .ascii " -export:lzma_crc64" # endif #elif !defined(__MSDOS__) /* ELF */ .size LZMA_CRC64, .-LZMA_CRC64 #endif /* * This is needed to support non-executable stack. It's ugly to - * use __linux__ here, but I don't know a way to detect when + * use __FreeBSD__ and __linux__ here, but I don't know a way to detect when * we are using GNU assembler. */ #if defined(__ELF__) && (defined(__FreeBSD__) || defined(__linux__)) .section .note.GNU-stack,"",@progbits #endif diff --git a/contrib/xz/src/liblzma/common/alone_decoder.c b/contrib/xz/src/liblzma/common/alone_decoder.c index 239b230ef19b..a3ea20a27a9c 100644 --- a/contrib/xz/src/liblzma/common/alone_decoder.c +++ b/contrib/xz/src/liblzma/common/alone_decoder.c @@ -1,242 +1,242 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file alone_decoder.c /// \brief Decoder for LZMA_Alone files // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "alone_decoder.h" #include "lzma_decoder.h" #include "lz_decoder.h" typedef struct { lzma_next_coder next; enum { SEQ_PROPERTIES, SEQ_DICTIONARY_SIZE, SEQ_UNCOMPRESSED_SIZE, SEQ_CODER_INIT, SEQ_CODE, } sequence; /// If true, reject files that are unlikely to be .lzma files. /// If false, more non-.lzma files get accepted and will give /// LZMA_DATA_ERROR either immediately or after a few output bytes. bool picky; /// Position in the header fields size_t pos; /// Uncompressed size decoded from the header lzma_vli uncompressed_size; /// Memory usage limit uint64_t memlimit; /// Amount of memory actually needed (only an estimate) uint64_t memusage; /// Options decoded from the header needed to initialize /// the LZMA decoder lzma_options_lzma options; } lzma_alone_coder; static lzma_ret alone_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_alone_coder *coder = coder_ptr; while (*out_pos < out_size && (coder->sequence == SEQ_CODE || *in_pos < in_size)) switch (coder->sequence) { case SEQ_PROPERTIES: if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) return LZMA_FORMAT_ERROR; coder->sequence = SEQ_DICTIONARY_SIZE; ++*in_pos; break; case SEQ_DICTIONARY_SIZE: coder->options.dict_size |= (size_t)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 4) { if (coder->picky && coder->options.dict_size != UINT32_MAX) { // A hack to ditch tons of false positives: // We allow only dictionary sizes that are // 2^n or 2^n + 2^(n-1). LZMA_Alone created // only files with 2^n, but accepts any // dictionary size. uint32_t d = coder->options.dict_size - 1; d |= d >> 2; d |= d >> 3; d |= d >> 4; d |= d >> 8; d |= d >> 16; ++d; if (d != coder->options.dict_size) return LZMA_FORMAT_ERROR; } coder->pos = 0; coder->sequence = SEQ_UNCOMPRESSED_SIZE; } ++*in_pos; break; case SEQ_UNCOMPRESSED_SIZE: coder->uncompressed_size |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); ++*in_pos; if (++coder->pos < 8) break; // Another hack to ditch false positives: Assume that // if the uncompressed size is known, it must be less // than 256 GiB. if (coder->picky && coder->uncompressed_size != LZMA_VLI_UNKNOWN && coder->uncompressed_size >= (LZMA_VLI_C(1) << 38)) return LZMA_FORMAT_ERROR; // Calculate the memory usage so that it is ready // for SEQ_CODER_INIT. coder->memusage = lzma_lzma_decoder_memusage(&coder->options) + LZMA_MEMUSAGE_BASE; coder->pos = 0; coder->sequence = SEQ_CODER_INIT; // Fall through case SEQ_CODER_INIT: { if (coder->memusage > coder->memlimit) return LZMA_MEMLIMIT_ERROR; lzma_filter_info filters[2] = { { .init = &lzma_lzma_decoder_init, .options = &coder->options, }, { .init = NULL, } }; const lzma_ret ret = lzma_next_filter_init(&coder->next, allocator, filters); if (ret != LZMA_OK) return ret; // Use a hack to set the uncompressed size. lzma_lz_decoder_uncompressed(coder->next.coder, - coder->uncompressed_size); + coder->uncompressed_size, true); coder->sequence = SEQ_CODE; break; } case SEQ_CODE: { return coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); } default: return LZMA_PROG_ERROR; } return LZMA_OK; } static void alone_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_alone_coder *coder = coder_ptr; lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); return; } static lzma_ret alone_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_alone_coder *coder = coder_ptr; *memusage = coder->memusage; *old_memlimit = coder->memlimit; if (new_memlimit != 0) { if (new_memlimit < coder->memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit = new_memlimit; } return LZMA_OK; } extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, bool picky) { lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); lzma_alone_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_alone_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &alone_decode; next->end = &alone_decoder_end; next->memconfig = &alone_decoder_memconfig; coder->next = LZMA_NEXT_CODER_INIT; } coder->sequence = SEQ_PROPERTIES; coder->picky = picky; coder->pos = 0; coder->options.dict_size = 0; coder->options.preset_dict = NULL; coder->options.preset_dict_size = 0; coder->uncompressed_size = 0; coder->memlimit = my_max(1, memlimit); coder->memusage = LZMA_MEMUSAGE_BASE; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) { lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit, false); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/auto_decoder.c b/contrib/xz/src/liblzma/common/auto_decoder.c index 6895c7ccf7b5..f58ab595ee3c 100644 --- a/contrib/xz/src/liblzma/common/auto_decoder.c +++ b/contrib/xz/src/liblzma/common/auto_decoder.c @@ -1,195 +1,195 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file auto_decoder.c /// \brief Autodetect between .xz Stream and .lzma (LZMA_Alone) formats // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" #include "alone_decoder.h" typedef struct { /// Stream decoder or LZMA_Alone decoder lzma_next_coder next; uint64_t memlimit; uint32_t flags; enum { SEQ_INIT, SEQ_CODE, SEQ_FINISH, } sequence; } lzma_auto_coder; static lzma_ret auto_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_auto_coder *coder = coder_ptr; switch (coder->sequence) { case SEQ_INIT: if (*in_pos >= in_size) return LZMA_OK; // Update the sequence now, because we want to continue from // SEQ_CODE even if we return some LZMA_*_CHECK. coder->sequence = SEQ_CODE; // Detect the file format. For now this is simple, since if // it doesn't start with 0xFD (the first magic byte of the // new format), it has to be LZMA_Alone, or something that // we don't support at all. if (in[*in_pos] == 0xFD) { return_if_error(lzma_stream_decoder_init( &coder->next, allocator, coder->memlimit, coder->flags)); } else { return_if_error(lzma_alone_decoder_init(&coder->next, allocator, coder->memlimit, true)); // If the application wants to know about missing // integrity check or about the check in general, we // need to handle it here, because LZMA_Alone decoder // doesn't accept any flags. if (coder->flags & LZMA_TELL_NO_CHECK) return LZMA_NO_CHECK; if (coder->flags & LZMA_TELL_ANY_CHECK) return LZMA_GET_CHECK; } // Fall through case SEQ_CODE: { const lzma_ret ret = coder->next.code( coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); if (ret != LZMA_STREAM_END || (coder->flags & LZMA_CONCATENATED) == 0) return ret; coder->sequence = SEQ_FINISH; } // Fall through case SEQ_FINISH: - // When LZMA_DECODE_CONCATENATED was used and we were decoding - // LZMA_Alone file, we need to check check that there is no + // When LZMA_CONCATENATED was used and we were decoding + // a LZMA_Alone file, we need to check that there is no // trailing garbage and wait for LZMA_FINISH. if (*in_pos < in_size) return LZMA_DATA_ERROR; return action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK; default: assert(0); return LZMA_PROG_ERROR; } } static void auto_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_auto_coder *coder = coder_ptr; lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); return; } static lzma_check auto_decoder_get_check(const void *coder_ptr) { const lzma_auto_coder *coder = coder_ptr; // It is LZMA_Alone if get_check is NULL. return coder->next.get_check == NULL ? LZMA_CHECK_NONE : coder->next.get_check(coder->next.coder); } static lzma_ret auto_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_auto_coder *coder = coder_ptr; lzma_ret ret; if (coder->next.memconfig != NULL) { ret = coder->next.memconfig(coder->next.coder, memusage, old_memlimit, new_memlimit); assert(*old_memlimit == coder->memlimit); } else { // No coder is configured yet. Use the base value as // the current memory usage. *memusage = LZMA_MEMUSAGE_BASE; *old_memlimit = coder->memlimit; ret = LZMA_OK; if (new_memlimit != 0 && new_memlimit < *memusage) ret = LZMA_MEMLIMIT_ERROR; } if (ret == LZMA_OK && new_memlimit != 0) coder->memlimit = new_memlimit; return ret; } static lzma_ret auto_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&auto_decoder_init, next, allocator); if (flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; lzma_auto_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_auto_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &auto_decode; next->end = &auto_decoder_end; next->get_check = &auto_decoder_get_check; next->memconfig = &auto_decoder_memconfig; coder->next = LZMA_NEXT_CODER_INIT; } coder->memlimit = my_max(1, memlimit); coder->flags = flags; coder->sequence = SEQ_INIT; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_auto_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) { lzma_next_strm_init(auto_decoder_init, strm, memlimit, flags); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/block_buffer_encoder.c b/contrib/xz/src/liblzma/common/block_buffer_encoder.c index 39e263aa4765..a47342efd0f5 100644 --- a/contrib/xz/src/liblzma/common/block_buffer_encoder.c +++ b/contrib/xz/src/liblzma/common/block_buffer_encoder.c @@ -1,337 +1,355 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file block_buffer_encoder.c /// \brief Single-call .xz Block encoder // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "block_buffer_encoder.h" #include "block_encoder.h" #include "filter_encoder.h" #include "lzma2_encoder.h" #include "check.h" /// Estimate the maximum size of the Block Header and Check fields for /// a Block that uses LZMA2 uncompressed chunks. We could use /// lzma_block_header_size() but this is simpler. /// /// Block Header Size + Block Flags + Compressed Size /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check /// and round up to the next multiple of four to take Header Padding /// into account. #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \ + LZMA_CHECK_SIZE_MAX + 3) & ~3) static uint64_t lzma2_bound(uint64_t uncompressed_size) { // Prevent integer overflow in overhead calculation. if (uncompressed_size > COMPRESSED_SIZE_MAX) return 0; // Calculate the exact overhead of the LZMA2 headers: Round // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, // multiply by the size of per-chunk header, and add one byte for // the end marker. const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) / LZMA2_CHUNK_MAX) * LZMA2_HEADER_UNCOMPRESSED + 1; // Catch the possible integer overflow. if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size) return 0; return uncompressed_size + overhead; } extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size) { // If the data doesn't compress, we always use uncompressed // LZMA2 chunks. uint64_t lzma2_size = lzma2_bound(uncompressed_size); if (lzma2_size == 0) return 0; // Take Block Padding into account. lzma2_size = (lzma2_size + 3) & ~UINT64_C(3); // No risk of integer overflow because lzma2_bound() already takes // into account the size of the headers in the Block. return HEADERS_BOUND + lzma2_size; } extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) { uint64_t ret = lzma_block_buffer_bound64(uncompressed_size); #if SIZE_MAX < UINT64_MAX // Catch the possible integer overflow on 32-bit systems. if (ret > SIZE_MAX) return 0; #endif return ret; } static lzma_ret block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at // all, but LZMA2 always requires a dictionary, so use the minimum // value to minimize memory usage of the decoder. lzma_options_lzma lzma2 = { .dict_size = LZMA_DICT_SIZE_MIN, }; lzma_filter filters[2]; filters[0].id = LZMA_FILTER_LZMA2; filters[0].options = &lzma2; filters[1].id = LZMA_VLI_UNKNOWN; // Set the above filter options to *block temporarily so that we can // encode the Block Header. lzma_filter *filters_orig = block->filters; block->filters = filters; if (lzma_block_header_size(block) != LZMA_OK) { block->filters = filters_orig; return LZMA_PROG_ERROR; } // Check that there's enough output space. The caller has already // set block->compressed_size to what lzma2_bound() has returned, // so we can reuse that value. We know that compressed_size is a // known valid VLI and header_size is a small value so their sum // will never overflow. assert(block->compressed_size == lzma2_bound(in_size)); if (out_size - *out_pos < block->header_size + block->compressed_size) { block->filters = filters_orig; return LZMA_BUF_ERROR; } if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) { block->filters = filters_orig; return LZMA_PROG_ERROR; } block->filters = filters_orig; *out_pos += block->header_size; // Encode the data using LZMA2 uncompressed chunks. size_t in_pos = 0; uint8_t control = 0x01; // Dictionary reset while (in_pos < in_size) { // Control byte: Indicate uncompressed chunk, of which // the first resets the dictionary. out[(*out_pos)++] = control; control = 0x02; // No dictionary reset // Size of the uncompressed chunk const size_t copy_size = my_min(in_size - in_pos, LZMA2_CHUNK_MAX); out[(*out_pos)++] = (copy_size - 1) >> 8; out[(*out_pos)++] = (copy_size - 1) & 0xFF; // The actual data assert(*out_pos + copy_size <= out_size); memcpy(out + *out_pos, in + in_pos, copy_size); in_pos += copy_size; *out_pos += copy_size; } // End marker out[(*out_pos)++] = 0x00; assert(*out_pos <= out_size); return LZMA_OK; } static lzma_ret block_encode_normal(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // Find out the size of the Block Header. return_if_error(lzma_block_header_size(block)); // Reserve space for the Block Header and skip it for now. if (out_size - *out_pos <= block->header_size) return LZMA_BUF_ERROR; const size_t out_start = *out_pos; *out_pos += block->header_size; // Limit out_size so that we stop encoding if the output would grow // bigger than what uncompressed Block would be. if (out_size - *out_pos > block->compressed_size) out_size = *out_pos + block->compressed_size; // TODO: In many common cases this could be optimized to use // significantly less memory. lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT; lzma_ret ret = lzma_raw_encoder_init( &raw_encoder, allocator, block->filters); if (ret == LZMA_OK) { size_t in_pos = 0; ret = raw_encoder.code(raw_encoder.coder, allocator, in, &in_pos, in_size, out, out_pos, out_size, LZMA_FINISH); } // NOTE: This needs to be run even if lzma_raw_encoder_init() failed. lzma_next_end(&raw_encoder, allocator); if (ret == LZMA_STREAM_END) { // Compression was successful. Write the Block Header. block->compressed_size = *out_pos - (out_start + block->header_size); ret = lzma_block_header_encode(block, out + out_start); if (ret != LZMA_OK) ret = LZMA_PROG_ERROR; } else if (ret == LZMA_OK) { // Output buffer became full. ret = LZMA_BUF_ERROR; } // Reset *out_pos if something went wrong. if (ret != LZMA_OK) *out_pos = out_start; return ret; } static lzma_ret block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size, bool try_to_compress) { // Validate the arguments. if (block == NULL || (in == NULL && in_size != 0) || out == NULL || out_pos == NULL || *out_pos > out_size) return LZMA_PROG_ERROR; // The contents of the structure may depend on the version so // check the version before validating the contents of *block. if (block->version > 1) return LZMA_OPTIONS_ERROR; if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX || (try_to_compress && block->filters == NULL)) return LZMA_PROG_ERROR; if (!lzma_check_is_supported(block->check)) return LZMA_UNSUPPORTED_CHECK; // Size of a Block has to be a multiple of four, so limit the size // here already. This way we don't need to check it again when adding // Block Padding. out_size -= (out_size - *out_pos) & 3; // Get the size of the Check field. const size_t check_size = lzma_check_size(block->check); assert(check_size != UINT32_MAX); // Reserve space for the Check field. if (out_size - *out_pos <= check_size) return LZMA_BUF_ERROR; out_size -= check_size; // Initialize block->uncompressed_size and calculate the worst-case // value for block->compressed_size. block->uncompressed_size = in_size; block->compressed_size = lzma2_bound(in_size); if (block->compressed_size == 0) return LZMA_DATA_ERROR; // Do the actual compression. lzma_ret ret = LZMA_BUF_ERROR; if (try_to_compress) ret = block_encode_normal(block, allocator, in, in_size, out, out_pos, out_size); if (ret != LZMA_OK) { // If the error was something else than output buffer // becoming full, return the error now. if (ret != LZMA_BUF_ERROR) return ret; // The data was uncompressible (at least with the options // given to us) or the output buffer was too small. Use the // uncompressed chunks of LZMA2 to wrap the data into a valid // Block. If we haven't been given enough output space, even // this may fail. return_if_error(block_encode_uncompressed(block, in, in_size, out, out_pos, out_size)); } assert(*out_pos <= out_size); // Block Padding. No buffer overflow here, because we already adjusted // out_size so that (out_size - out_start) is a multiple of four. // Thus, if the buffer is full, the loop body can never run. for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) { assert(*out_pos < out_size); out[(*out_pos)++] = 0x00; } // If there's no Check field, we are done now. if (check_size > 0) { // Calculate the integrity check. We reserved space for // the Check field earlier so we don't need to check for // available output space here. lzma_check_state check; lzma_check_init(&check, block->check); lzma_check_update(&check, block->check, in, in_size); lzma_check_finish(&check, block->check); memcpy(block->raw_check, check.buffer.u8, check_size); memcpy(out + *out_pos, check.buffer.u8, check_size); *out_pos += check_size; } return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { return block_buffer_encode(block, allocator, in, in_size, out, out_pos, out_size, true); } +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// This is for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2", + lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result + __attribute__((__alias__("lzma_block_uncomp_encode_52"))); + +LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2", + lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + +#define lzma_block_uncomp_encode lzma_block_uncomp_encode_52 +#endif extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block, const uint8_t *in, size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size) { // It won't allocate any memory from heap so no need // for lzma_allocator. return block_buffer_encode(block, NULL, in, in_size, out, out_pos, out_size, false); } diff --git a/contrib/xz/src/liblzma/common/block_decoder.c b/contrib/xz/src/liblzma/common/block_decoder.c index 075bd279ff60..4827e0f0469d 100644 --- a/contrib/xz/src/liblzma/common/block_decoder.c +++ b/contrib/xz/src/liblzma/common/block_decoder.c @@ -1,257 +1,286 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file block_decoder.c /// \brief Decodes .xz Blocks // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "block_decoder.h" #include "filter_decoder.h" #include "check.h" typedef struct { enum { SEQ_CODE, SEQ_PADDING, SEQ_CHECK, } sequence; /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; /// Decoding options; we also write Compressed Size and Uncompressed /// Size back to this structure when the decoding has been finished. lzma_block *block; /// Compressed Size calculated while decoding lzma_vli compressed_size; /// Uncompressed Size calculated while decoding lzma_vli uncompressed_size; /// Maximum allowed Compressed Size; this takes into account the /// size of the Block Header and Check fields when Compressed Size /// is unknown. lzma_vli compressed_limit; + /// Maximum allowed Uncompressed Size. + lzma_vli uncompressed_limit; + /// Position when reading the Check field size_t check_pos; /// Check of the uncompressed data lzma_check_state check; /// True if the integrity check won't be calculated and verified. bool ignore_check; } lzma_block_coder; -static inline bool -update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) -{ - if (limit > LZMA_VLI_MAX) - limit = LZMA_VLI_MAX; - - if (limit < *size || limit - *size < add) - return true; - - *size += add; - - return false; -} - - static inline bool is_size_valid(lzma_vli size, lzma_vli reference) { return reference == LZMA_VLI_UNKNOWN || reference == size; } static lzma_ret block_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_block_coder *coder = coder_ptr; switch (coder->sequence) { case SEQ_CODE: { const size_t in_start = *in_pos; const size_t out_start = *out_pos; + // Limit the amount of input and output space that we give + // to the raw decoder based on the information we have + // (or don't have) from Block Header. + const size_t in_stop = *in_pos + (size_t)my_min( + in_size - *in_pos, + coder->compressed_limit - coder->compressed_size); + const size_t out_stop = *out_pos + (size_t)my_min( + out_size - *out_pos, + coder->uncompressed_limit - coder->uncompressed_size); + const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, in, in_pos, in_size, - out, out_pos, out_size, action); + allocator, in, in_pos, in_stop, + out, out_pos, out_stop, action); const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - // NOTE: We compare to compressed_limit here, which prevents - // the total size of the Block growing past LZMA_VLI_MAX. - if (update_size(&coder->compressed_size, in_used, - coder->compressed_limit) - || update_size(&coder->uncompressed_size, - out_used, - coder->block->uncompressed_size)) - return LZMA_DATA_ERROR; + // Because we have limited the input and output sizes, + // we know that these cannot grow too big or overflow. + coder->compressed_size += in_used; + coder->uncompressed_size += out_used; + + if (ret == LZMA_OK) { + const bool comp_done = coder->compressed_size + == coder->block->compressed_size; + const bool uncomp_done = coder->uncompressed_size + == coder->block->uncompressed_size; + + // If both input and output amounts match the sizes + // in Block Header but we still got LZMA_OK instead + // of LZMA_STREAM_END, the file is broken. + if (comp_done && uncomp_done) + return LZMA_DATA_ERROR; + + // If the decoder has consumed all the input that it + // needs but it still couldn't fill the output buffer + // or return LZMA_STREAM_END, the file is broken. + if (comp_done && *out_pos < out_size) + return LZMA_DATA_ERROR; + + // If the decoder has produced all the output but + // it still didn't return LZMA_STREAM_END or consume + // more input (for example, detecting an end of + // payload marker may need more input but produce + // no output) the file is broken. + if (uncomp_done && *in_pos < in_size) + return LZMA_DATA_ERROR; + } if (!coder->ignore_check) lzma_check_update(&coder->check, coder->block->check, out + out_start, out_used); if (ret != LZMA_STREAM_END) return ret; // Compressed and Uncompressed Sizes are now at their final // values. Verify that they match the values given to us. if (!is_size_valid(coder->compressed_size, coder->block->compressed_size) || !is_size_valid(coder->uncompressed_size, coder->block->uncompressed_size)) return LZMA_DATA_ERROR; // Copy the values into coder->block. The caller // may use this information to construct Index. coder->block->compressed_size = coder->compressed_size; coder->block->uncompressed_size = coder->uncompressed_size; coder->sequence = SEQ_PADDING; } // Fall through case SEQ_PADDING: // Compressed Data is padded to a multiple of four bytes. while (coder->compressed_size & 3) { if (*in_pos >= in_size) return LZMA_OK; // We use compressed_size here just get the Padding // right. The actual Compressed Size was stored to // coder->block already, and won't be modified by // us anymore. ++coder->compressed_size; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; } if (coder->block->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; if (!coder->ignore_check) lzma_check_finish(&coder->check, coder->block->check); coder->sequence = SEQ_CHECK; // Fall through case SEQ_CHECK: { const size_t check_size = lzma_check_size(coder->block->check); lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check, &coder->check_pos, check_size); if (coder->check_pos < check_size) return LZMA_OK; // Validate the Check only if we support it. // coder->check.buffer may be uninitialized // when the Check ID is not supported. if (!coder->ignore_check && lzma_check_is_supported(coder->block->check) && memcmp(coder->block->raw_check, coder->check.buffer.u8, check_size) != 0) return LZMA_DATA_ERROR; return LZMA_STREAM_END; } } return LZMA_PROG_ERROR; } static void block_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_block_coder *coder = coder_ptr; lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); return; } extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_block *block) { lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); // Validate the options. lzma_block_unpadded_size() does that for us // except for Uncompressed Size and filters. Filters are validated // by the raw decoder. if (lzma_block_unpadded_size(block) == 0 || !lzma_vli_is_valid(block->uncompressed_size)) return LZMA_PROG_ERROR; // Allocate *next->coder if needed. lzma_block_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_block_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &block_decode; next->end = &block_decoder_end; coder->next = LZMA_NEXT_CODER_INIT; } // Basic initializations coder->sequence = SEQ_CODE; coder->block = block; coder->compressed_size = 0; coder->uncompressed_size = 0; // If Compressed Size is not known, we calculate the maximum allowed // value so that encoded size of the Block (including Block Padding) // is still a valid VLI and a multiple of four. coder->compressed_limit = block->compressed_size == LZMA_VLI_UNKNOWN ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) - block->header_size - lzma_check_size(block->check) : block->compressed_size; + // With Uncompressed Size this is simpler. If Block Header lacks + // the size info, then LZMA_VLI_MAX is the maximum possible + // Uncompressed Size. + coder->uncompressed_limit + = block->uncompressed_size == LZMA_VLI_UNKNOWN + ? LZMA_VLI_MAX + : block->uncompressed_size; + // Initialize the check. It's caller's problem if the Check ID is not // supported, and the Block decoder cannot verify the Check field. // Caller can test lzma_check_is_supported(block->check). coder->check_pos = 0; lzma_check_init(&coder->check, block->check); coder->ignore_check = block->version >= 1 ? block->ignore_check : false; // Initialize the filter chain. return lzma_raw_decoder_init(&coder->next, allocator, block->filters); } extern LZMA_API(lzma_ret) lzma_block_decoder(lzma_stream *strm, lzma_block *block) { lzma_next_strm_init(lzma_block_decoder_init, strm, block); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/common.c b/contrib/xz/src/liblzma/common/common.c index cf714e5e43b4..10fc884099a4 100644 --- a/contrib/xz/src/liblzma/common/common.c +++ b/contrib/xz/src/liblzma/common/common.c @@ -1,449 +1,463 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file common.c /// \brief Common functions needed in many places in liblzma // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "common.h" ///////////// // Version // ///////////// extern LZMA_API(uint32_t) lzma_version_number(void) { return LZMA_VERSION; } extern LZMA_API(const char *) lzma_version_string(void) { return LZMA_VERSION_STRING; } /////////////////////// // Memory allocation // /////////////////////// extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) lzma_alloc(size_t size, const lzma_allocator *allocator) { // Some malloc() variants return NULL if called with size == 0. if (size == 0) size = 1; void *ptr; if (allocator != NULL && allocator->alloc != NULL) ptr = allocator->alloc(allocator->opaque, 1, size); else ptr = malloc(size); return ptr; } extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) lzma_alloc_zero(size_t size, const lzma_allocator *allocator) { // Some calloc() variants return NULL if called with size == 0. if (size == 0) size = 1; void *ptr; if (allocator != NULL && allocator->alloc != NULL) { ptr = allocator->alloc(allocator->opaque, 1, size); if (ptr != NULL) memzero(ptr, size); } else { ptr = calloc(1, size); } return ptr; } extern void lzma_free(void *ptr, const lzma_allocator *allocator) { if (allocator != NULL && allocator->free != NULL) allocator->free(allocator->opaque, ptr); else free(ptr); return; } ////////// // Misc // ////////// extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { const size_t in_avail = in_size - *in_pos; const size_t out_avail = out_size - *out_pos; const size_t copy_size = my_min(in_avail, out_avail); // Call memcpy() only if there is something to copy. If there is // nothing to copy, in or out might be NULL and then the memcpy() // call would trigger undefined behavior. if (copy_size > 0) memcpy(out + *out_pos, in + *in_pos, copy_size); *in_pos += copy_size; *out_pos += copy_size; return copy_size; } extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { lzma_next_coder_init(filters[0].init, next, allocator); next->id = filters[0].id; return filters[0].init == NULL ? LZMA_OK : filters[0].init(next, allocator, filters); } extern lzma_ret lzma_next_filter_update(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *reversed_filters) { // Check that the application isn't trying to change the Filter ID. // End of filters is indicated with LZMA_VLI_UNKNOWN in both // reversed_filters[0].id and next->id. if (reversed_filters[0].id != next->id) return LZMA_PROG_ERROR; if (reversed_filters[0].id == LZMA_VLI_UNKNOWN) return LZMA_OK; assert(next->update != NULL); return next->update(next->coder, allocator, NULL, reversed_filters); } extern void lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator) { if (next->init != (uintptr_t)(NULL)) { // To avoid tiny end functions that simply call // lzma_free(coder, allocator), we allow leaving next->end // NULL and call lzma_free() here. if (next->end != NULL) next->end(next->coder, allocator); else lzma_free(next->coder, allocator); // Reset the variables so the we don't accidentally think // that it is an already initialized coder. *next = LZMA_NEXT_CODER_INIT; } return; } ////////////////////////////////////// // External to internal API wrapper // ////////////////////////////////////// extern lzma_ret lzma_strm_init(lzma_stream *strm) { if (strm == NULL) return LZMA_PROG_ERROR; if (strm->internal == NULL) { strm->internal = lzma_alloc(sizeof(lzma_internal), strm->allocator); if (strm->internal == NULL) return LZMA_MEM_ERROR; strm->internal->next = LZMA_NEXT_CODER_INIT; } memzero(strm->internal->supported_actions, sizeof(strm->internal->supported_actions)); strm->internal->sequence = ISEQ_RUN; strm->internal->allow_buf_error = false; strm->total_in = 0; strm->total_out = 0; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) { // Sanity checks if ((strm->next_in == NULL && strm->avail_in != 0) || (strm->next_out == NULL && strm->avail_out != 0) || strm->internal == NULL || strm->internal->next.code == NULL || (unsigned int)(action) > LZMA_ACTION_MAX || !strm->internal->supported_actions[action]) return LZMA_PROG_ERROR; // Check if unsupported members have been set to non-zero or non-NULL, // which would indicate that some new feature is wanted. if (strm->reserved_ptr1 != NULL || strm->reserved_ptr2 != NULL || strm->reserved_ptr3 != NULL || strm->reserved_ptr4 != NULL || strm->reserved_int1 != 0 || strm->reserved_int2 != 0 || strm->reserved_int3 != 0 || strm->reserved_int4 != 0 || strm->reserved_enum1 != LZMA_RESERVED_ENUM || strm->reserved_enum2 != LZMA_RESERVED_ENUM) return LZMA_OPTIONS_ERROR; switch (strm->internal->sequence) { case ISEQ_RUN: switch (action) { case LZMA_RUN: break; case LZMA_SYNC_FLUSH: strm->internal->sequence = ISEQ_SYNC_FLUSH; break; case LZMA_FULL_FLUSH: strm->internal->sequence = ISEQ_FULL_FLUSH; break; case LZMA_FINISH: strm->internal->sequence = ISEQ_FINISH; break; case LZMA_FULL_BARRIER: strm->internal->sequence = ISEQ_FULL_BARRIER; break; } break; case ISEQ_SYNC_FLUSH: // The same action must be used until we return // LZMA_STREAM_END, and the amount of input must not change. if (action != LZMA_SYNC_FLUSH || strm->internal->avail_in != strm->avail_in) return LZMA_PROG_ERROR; break; case ISEQ_FULL_FLUSH: if (action != LZMA_FULL_FLUSH || strm->internal->avail_in != strm->avail_in) return LZMA_PROG_ERROR; break; case ISEQ_FINISH: if (action != LZMA_FINISH || strm->internal->avail_in != strm->avail_in) return LZMA_PROG_ERROR; break; case ISEQ_FULL_BARRIER: if (action != LZMA_FULL_BARRIER || strm->internal->avail_in != strm->avail_in) return LZMA_PROG_ERROR; break; case ISEQ_END: return LZMA_STREAM_END; case ISEQ_ERROR: default: return LZMA_PROG_ERROR; } size_t in_pos = 0; size_t out_pos = 0; lzma_ret ret = strm->internal->next.code( strm->internal->next.coder, strm->allocator, strm->next_in, &in_pos, strm->avail_in, strm->next_out, &out_pos, strm->avail_out, action); strm->next_in += in_pos; strm->avail_in -= in_pos; strm->total_in += in_pos; strm->next_out += out_pos; strm->avail_out -= out_pos; strm->total_out += out_pos; strm->internal->avail_in = strm->avail_in; // Cast is needed to silence a warning about LZMA_TIMED_OUT, which // isn't part of lzma_ret enumeration. switch ((unsigned int)(ret)) { case LZMA_OK: // Don't return LZMA_BUF_ERROR when it happens the first time. // This is to avoid returning LZMA_BUF_ERROR when avail_out // was zero but still there was no more data left to written // to next_out. if (out_pos == 0 && in_pos == 0) { if (strm->internal->allow_buf_error) ret = LZMA_BUF_ERROR; else strm->internal->allow_buf_error = true; } else { strm->internal->allow_buf_error = false; } break; case LZMA_TIMED_OUT: strm->internal->allow_buf_error = false; ret = LZMA_OK; break; case LZMA_STREAM_END: if (strm->internal->sequence == ISEQ_SYNC_FLUSH || strm->internal->sequence == ISEQ_FULL_FLUSH || strm->internal->sequence == ISEQ_FULL_BARRIER) strm->internal->sequence = ISEQ_RUN; else strm->internal->sequence = ISEQ_END; // Fall through case LZMA_NO_CHECK: case LZMA_UNSUPPORTED_CHECK: case LZMA_GET_CHECK: case LZMA_MEMLIMIT_ERROR: // Something else than LZMA_OK, but not a fatal error, // that is, coding may be continued (except if ISEQ_END). strm->internal->allow_buf_error = false; break; default: // All the other errors are fatal; coding cannot be continued. assert(ret != LZMA_BUF_ERROR); strm->internal->sequence = ISEQ_ERROR; break; } return ret; } extern LZMA_API(void) lzma_end(lzma_stream *strm) { if (strm != NULL && strm->internal != NULL) { lzma_next_end(&strm->internal->next, strm->allocator); lzma_free(strm->internal, strm->allocator); strm->internal = NULL; } return; } +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// This is for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +LZMA_SYMVER_API("lzma_get_progress@XZ_5.2.2", + void, lzma_get_progress_522)(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow + __attribute__((__alias__("lzma_get_progress_52"))); + +LZMA_SYMVER_API("lzma_get_progress@@XZ_5.2", + void, lzma_get_progress_52)(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow; + +#define lzma_get_progress lzma_get_progress_52 +#endif extern LZMA_API(void) lzma_get_progress(lzma_stream *strm, uint64_t *progress_in, uint64_t *progress_out) { if (strm->internal->next.get_progress != NULL) { strm->internal->next.get_progress(strm->internal->next.coder, progress_in, progress_out); } else { *progress_in = strm->total_in; *progress_out = strm->total_out; } return; } extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) { // Return LZMA_CHECK_NONE if we cannot know the check type. // It's a bug in the application if this happens. if (strm->internal->next.get_check == NULL) return LZMA_CHECK_NONE; return strm->internal->next.get_check(strm->internal->next.coder); } extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) { uint64_t memusage; uint64_t old_memlimit; if (strm == NULL || strm->internal == NULL || strm->internal->next.memconfig == NULL || strm->internal->next.memconfig( strm->internal->next.coder, &memusage, &old_memlimit, 0) != LZMA_OK) return 0; return memusage; } extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) { uint64_t old_memlimit; uint64_t memusage; if (strm == NULL || strm->internal == NULL || strm->internal->next.memconfig == NULL || strm->internal->next.memconfig( strm->internal->next.coder, &memusage, &old_memlimit, 0) != LZMA_OK) return 0; return old_memlimit; } extern LZMA_API(lzma_ret) lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit) { // Dummy variables to simplify memconfig functions uint64_t old_memlimit; uint64_t memusage; if (strm == NULL || strm->internal == NULL || strm->internal->next.memconfig == NULL) return LZMA_PROG_ERROR; // Zero is a special value that cannot be used as an actual limit. // If 0 was specified, use 1 instead. if (new_memlimit == 0) new_memlimit = 1; return strm->internal->next.memconfig(strm->internal->next.coder, &memusage, &old_memlimit, new_memlimit); } diff --git a/contrib/xz/src/liblzma/common/common.h b/contrib/xz/src/liblzma/common/common.h index b3d3b7a059b1..33928c3d2b33 100644 --- a/contrib/xz/src/liblzma/common/common.h +++ b/contrib/xz/src/liblzma/common/common.h @@ -1,314 +1,342 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file common.h /// \brief Definitions common to the whole liblzma library // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_COMMON_H #define LZMA_COMMON_H #include "sysdefs.h" #include "mythread.h" #include "tuklib_integer.h" #if defined(_WIN32) || defined(__CYGWIN__) # ifdef DLL_EXPORT # define LZMA_API_EXPORT __declspec(dllexport) # else # define LZMA_API_EXPORT # endif // Don't use ifdef or defined() below. #elif HAVE_VISIBILITY # define LZMA_API_EXPORT __attribute__((__visibility__("default"))) #else # define LZMA_API_EXPORT #endif #define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL #include "lzma.h" +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// To keep link-time optimization (LTO, -flto) working with GCC, +// the __symver__ attribute must be used instead of __asm__(".symver ..."). +// Otherwise the symbol versions may be lost, resulting in broken liblzma +// that has wrong default versions in the exported symbol list! +// The attribute was added in GCC 10; LTO with older GCC is not supported. +// +// To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function +// declarations (including those with __alias__ attribute) and LZMA_API with +// the function definitions. This means a little bit of silly copy-and-paste +// between declarations and definitions though. +// +// As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the +// very convenient @@@ isn't supported (it's supported by GNU assembler +// since 2000). When using @@ instead of @@@, the internal name must not be +// the same as the external name to avoid problems in some situations. This +// is why "#define foo_52 foo" is needed for the default symbol versions. +# if TUKLIB_GNUC_REQ(10, 0) && !defined(__INTEL_COMPILER) +# define LZMA_SYMVER_API(extnamever, type, intname) \ + extern __attribute__((__symver__(extnamever))) \ + LZMA_API(type) intname +# else +# define LZMA_SYMVER_API(extnamever, type, intname) \ + __asm__(".symver " #intname "," extnamever); \ + extern LZMA_API(type) intname +# endif +#endif + // These allow helping the compiler in some often-executed branches, whose // result is almost always the same. #ifdef __GNUC__ # define likely(expr) __builtin_expect(expr, true) # define unlikely(expr) __builtin_expect(expr, false) #else # define likely(expr) (expr) # define unlikely(expr) (expr) #endif /// Size of temporary buffers needed in some filters #define LZMA_BUFFER_SIZE 4096 /// Maximum number of worker threads within one multithreaded component. /// The limit exists solely to make it simpler to prevent integer overflows /// when allocating structures etc. This should be big enough for now... /// the code won't scale anywhere close to this number anyway. #define LZMA_THREADS_MAX 16384 /// Starting value for memory usage estimates. Instead of calculating size /// of _every_ structure and taking into account malloc() overhead etc., we /// add a base size to all memory usage estimates. It's not very accurate /// but should be easily good enough. #define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15) /// Start of internal Filter ID space. These IDs must never be used /// in Streams. #define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62) /// Supported flags that can be passed to lzma_stream_decoder() /// or lzma_auto_decoder(). #define LZMA_SUPPORTED_FLAGS \ ( LZMA_TELL_NO_CHECK \ | LZMA_TELL_UNSUPPORTED_CHECK \ | LZMA_TELL_ANY_CHECK \ | LZMA_IGNORE_CHECK \ | LZMA_CONCATENATED ) /// Largest valid lzma_action value as unsigned integer. #define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER)) /// Special return value (lzma_ret) to indicate that a timeout was reached /// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to /// LZMA_OK in lzma_code(). This is not in the lzma_ret enumeration because /// there's no need to have it in the public API. #define LZMA_TIMED_OUT 32 typedef struct lzma_next_coder_s lzma_next_coder; typedef struct lzma_filter_info_s lzma_filter_info; /// Type of a function used to initialize a filter encoder or decoder typedef lzma_ret (*lzma_init_function)( lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters); /// Type of a function to do some kind of coding work (filters, Stream, /// Block encoders/decoders etc.). Some special coders use don't use both /// input and output buffers, but for simplicity they still use this same /// function prototype. typedef lzma_ret (*lzma_code_function)( void *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action); /// Type of a function to free the memory allocated for the coder typedef void (*lzma_end_function)( void *coder, const lzma_allocator *allocator); /// Raw coder validates and converts an array of lzma_filter structures to /// an array of lzma_filter_info structures. This array is used with /// lzma_next_filter_init to initialize the filter chain. struct lzma_filter_info_s { /// Filter ID. This is used only by the encoder /// with lzma_filters_update(). lzma_vli id; /// Pointer to function used to initialize the filter. /// This is NULL to indicate end of array. lzma_init_function init; /// Pointer to filter's options structure void *options; }; /// Hold data and function pointers of the next filter in the chain. struct lzma_next_coder_s { /// Pointer to coder-specific data void *coder; /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't /// point to a filter coder. lzma_vli id; /// "Pointer" to init function. This is never called here. /// We need only to detect if we are initializing a coder /// that was allocated earlier. See lzma_next_coder_init and /// lzma_next_strm_init macros in this file. uintptr_t init; /// Pointer to function to do the actual coding lzma_code_function code; /// Pointer to function to free lzma_next_coder.coder. This can /// be NULL; in that case, lzma_free is called to free /// lzma_next_coder.coder. lzma_end_function end; /// Pointer to a function to get progress information. If this is NULL, /// lzma_stream.total_in and .total_out are used instead. void (*get_progress)(void *coder, uint64_t *progress_in, uint64_t *progress_out); /// Pointer to function to return the type of the integrity check. /// Most coders won't support this. lzma_check (*get_check)(const void *coder); /// Pointer to function to get and/or change the memory usage limit. /// If new_memlimit == 0, the limit is not changed. lzma_ret (*memconfig)(void *coder, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit); /// Update the filter-specific options or the whole filter chain /// in the encoder. lzma_ret (*update)(void *coder, const lzma_allocator *allocator, const lzma_filter *filters, const lzma_filter *reversed_filters); }; /// Macro to initialize lzma_next_coder structure #define LZMA_NEXT_CODER_INIT \ (lzma_next_coder){ \ .coder = NULL, \ .init = (uintptr_t)(NULL), \ .id = LZMA_VLI_UNKNOWN, \ .code = NULL, \ .end = NULL, \ .get_progress = NULL, \ .get_check = NULL, \ .memconfig = NULL, \ .update = NULL, \ } /// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to /// this is stored in lzma_stream. struct lzma_internal_s { /// The actual coder that should do something useful lzma_next_coder next; /// Track the state of the coder. This is used to validate arguments /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH /// is used on every call to lzma_code until next.code has returned /// LZMA_STREAM_END. enum { ISEQ_RUN, ISEQ_SYNC_FLUSH, ISEQ_FULL_FLUSH, ISEQ_FINISH, ISEQ_FULL_BARRIER, ISEQ_END, ISEQ_ERROR, } sequence; /// A copy of lzma_stream avail_in. This is used to verify that the /// amount of input doesn't change once e.g. LZMA_FINISH has been /// used. size_t avail_in; /// Indicates which lzma_action values are allowed by next.code. bool supported_actions[LZMA_ACTION_MAX + 1]; /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was /// made (no input consumed and no output produced by next.code). bool allow_buf_error; }; /// Allocates memory extern void *lzma_alloc(size_t size, const lzma_allocator *allocator) lzma_attribute((__malloc__)) lzma_attr_alloc_size(1); /// Allocates memory and zeroes it (like calloc()). This can be faster /// than lzma_alloc() + memzero() while being backward compatible with /// custom allocators. extern void * lzma_attribute((__malloc__)) lzma_attr_alloc_size(1) lzma_alloc_zero(size_t size, const lzma_allocator *allocator); /// Frees memory extern void lzma_free(void *ptr, const lzma_allocator *allocator); /// Allocates strm->internal if it is NULL, and initializes *strm and /// strm->internal. This function is only called via lzma_next_strm_init macro. extern lzma_ret lzma_strm_init(lzma_stream *strm); /// Initializes the next filter in the chain, if any. This takes care of /// freeing the memory of previously initialized filter if it is different /// than the filter being initialized now. This way the actual filter /// initialization functions don't need to use lzma_next_coder_init macro. extern lzma_ret lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters); /// Update the next filter in the chain, if any. This checks that /// the application is not trying to change the Filter IDs. extern lzma_ret lzma_next_filter_update( lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *reversed_filters); /// Frees the memory allocated for next->coder either using next->end or, /// if next->end is NULL, using lzma_free. extern void lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator); /// Copy as much data as possible from in[] to out[] and update *in_pos /// and *out_pos accordingly. Returns the number of bytes copied. extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size); /// \brief Return if expression doesn't evaluate to LZMA_OK /// /// There are several situations where we want to return immediately /// with the value of expr if it isn't LZMA_OK. This macro shortens /// the code a little. #define return_if_error(expr) \ do { \ const lzma_ret ret_ = (expr); \ if (ret_ != LZMA_OK) \ return ret_; \ } while (0) /// If next isn't already initialized, free the previous coder. Then mark /// that next is _possibly_ initialized for the coder using this macro. /// "Possibly" means that if e.g. allocation of next->coder fails, the /// structure isn't actually initialized for this coder, but leaving /// next->init to func is still OK. #define lzma_next_coder_init(func, next, allocator) \ do { \ if ((uintptr_t)(func) != (next)->init) \ lzma_next_end(next, allocator); \ (next)->init = (uintptr_t)(func); \ } while (0) /// Initializes lzma_strm and calls func() to initialize strm->internal->next. /// (The function being called will use lzma_next_coder_init()). If /// initialization fails, memory that wasn't freed by func() is freed /// along strm->internal. #define lzma_next_strm_init(func, strm, ...) \ do { \ return_if_error(lzma_strm_init(strm)); \ const lzma_ret ret_ = func(&(strm)->internal->next, \ (strm)->allocator, __VA_ARGS__); \ if (ret_ != LZMA_OK) { \ lzma_end(strm); \ return ret_; \ } \ } while (0) #endif diff --git a/contrib/xz/src/liblzma/common/filter_common.c b/contrib/xz/src/liblzma/common/filter_common.c index 9ad5d5d8e2af..590be7303feb 100644 --- a/contrib/xz/src/liblzma/common/filter_common.c +++ b/contrib/xz/src/liblzma/common/filter_common.c @@ -1,337 +1,343 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file filter_common.c /// \brief Filter-specific stuff common for both encoder and decoder // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "filter_common.h" static const struct { /// Filter ID lzma_vli id; /// Size of the filter-specific options structure size_t options_size; /// True if it is OK to use this filter as non-last filter in /// the chain. bool non_last_ok; /// True if it is OK to use this filter as the last filter in /// the chain. bool last_ok; /// True if the filter may change the size of the data (that is, the /// amount of encoded output can be different than the amount of /// uncompressed input). bool changes_size; } features[] = { #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) { .id = LZMA_FILTER_LZMA1, .options_size = sizeof(lzma_options_lzma), .non_last_ok = false, .last_ok = true, .changes_size = true, }, #endif #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) { .id = LZMA_FILTER_LZMA2, .options_size = sizeof(lzma_options_lzma), .non_last_ok = false, .last_ok = true, .changes_size = true, }, #endif #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) { .id = LZMA_FILTER_X86, .options_size = sizeof(lzma_options_bcj), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) { .id = LZMA_FILTER_POWERPC, .options_size = sizeof(lzma_options_bcj), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) { .id = LZMA_FILTER_IA64, .options_size = sizeof(lzma_options_bcj), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) { .id = LZMA_FILTER_ARM, .options_size = sizeof(lzma_options_bcj), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) { .id = LZMA_FILTER_ARMTHUMB, .options_size = sizeof(lzma_options_bcj), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) { .id = LZMA_FILTER_SPARC, .options_size = sizeof(lzma_options_bcj), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) { .id = LZMA_FILTER_DELTA, .options_size = sizeof(lzma_options_delta), .non_last_ok = true, .last_ok = false, .changes_size = false, }, #endif { .id = LZMA_VLI_UNKNOWN } }; extern LZMA_API(lzma_ret) -lzma_filters_copy(const lzma_filter *src, lzma_filter *dest, +lzma_filters_copy(const lzma_filter *src, lzma_filter *real_dest, const lzma_allocator *allocator) { - if (src == NULL || dest == NULL) + if (src == NULL || real_dest == NULL) return LZMA_PROG_ERROR; + // Use a temporary destination so that the real destination + // will never be modied if an error occurs. + lzma_filter dest[LZMA_FILTERS_MAX + 1]; + lzma_ret ret; size_t i; for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) { // There must be a maximum of four filters plus // the array terminator. if (i == LZMA_FILTERS_MAX) { ret = LZMA_OPTIONS_ERROR; goto error; } dest[i].id = src[i].id; if (src[i].options == NULL) { dest[i].options = NULL; } else { // See if the filter is supported only when the // options is not NULL. This might be convenient // sometimes if the app is actually copying only // a partial filter chain with a place holder ID. // // When options is not NULL, the Filter ID must be // supported by us, because otherwise we don't know // how big the options are. size_t j; for (j = 0; src[i].id != features[j].id; ++j) { if (features[j].id == LZMA_VLI_UNKNOWN) { ret = LZMA_OPTIONS_ERROR; goto error; } } // Allocate and copy the options. dest[i].options = lzma_alloc(features[j].options_size, allocator); if (dest[i].options == NULL) { ret = LZMA_MEM_ERROR; goto error; } memcpy(dest[i].options, src[i].options, features[j].options_size); } } // Terminate the filter array. - assert(i <= LZMA_FILTERS_MAX + 1); + assert(i < LZMA_FILTERS_MAX + 1); dest[i].id = LZMA_VLI_UNKNOWN; dest[i].options = NULL; + // Copy it to the caller-supplied array now that we know that + // no errors occurred. + memcpy(real_dest, dest, (i + 1) * sizeof(lzma_filter)); + return LZMA_OK; error: // Free the options which we have already allocated. - while (i-- > 0) { + while (i-- > 0) lzma_free(dest[i].options, allocator); - dest[i].options = NULL; - } return ret; } static lzma_ret validate_chain(const lzma_filter *filters, size_t *count) { // There must be at least one filter. if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN) return LZMA_PROG_ERROR; // Number of non-last filters that may change the size of the data // significantly (that is, more than 1-2 % or so). size_t changes_size_count = 0; // True if it is OK to add a new filter after the current filter. bool non_last_ok = true; // True if the last filter in the given chain is actually usable as // the last filter. Only filters that support embedding End of Payload // Marker can be used as the last filter in the chain. bool last_ok = false; size_t i = 0; do { size_t j; for (j = 0; filters[i].id != features[j].id; ++j) if (features[j].id == LZMA_VLI_UNKNOWN) return LZMA_OPTIONS_ERROR; // If the previous filter in the chain cannot be a non-last // filter, the chain is invalid. if (!non_last_ok) return LZMA_OPTIONS_ERROR; non_last_ok = features[j].non_last_ok; last_ok = features[j].last_ok; changes_size_count += features[j].changes_size; } while (filters[++i].id != LZMA_VLI_UNKNOWN); // There must be 1-4 filters. The last filter must be usable as // the last filter in the chain. A maximum of three filters are // allowed to change the size of the data. if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) return LZMA_OPTIONS_ERROR; *count = i; return LZMA_OK; } extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter *options, lzma_filter_find coder_find, bool is_encoder) { // Do some basic validation and get the number of filters. size_t count; return_if_error(validate_chain(options, &count)); // Set the filter functions and copy the options pointer. lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { // The order of the filters is reversed in the // encoder. It allows more efficient handling // of the uncompressed data. const size_t j = count - i - 1; const lzma_filter_coder *const fc = coder_find(options[i].id); if (fc == NULL || fc->init == NULL) return LZMA_OPTIONS_ERROR; filters[j].id = options[i].id; filters[j].init = fc->init; filters[j].options = options[i].options; } } else { for (size_t i = 0; i < count; ++i) { const lzma_filter_coder *const fc = coder_find(options[i].id); if (fc == NULL || fc->init == NULL) return LZMA_OPTIONS_ERROR; filters[i].id = options[i].id; filters[i].init = fc->init; filters[i].options = options[i].options; } } // Terminate the array. filters[count].id = LZMA_VLI_UNKNOWN; filters[count].init = NULL; // Initialize the filters. const lzma_ret ret = lzma_next_filter_init(next, allocator, filters); if (ret != LZMA_OK) lzma_next_end(next, allocator); return ret; } extern uint64_t lzma_raw_coder_memusage(lzma_filter_find coder_find, const lzma_filter *filters) { // The chain has to have at least one filter. { size_t tmp; if (validate_chain(filters, &tmp) != LZMA_OK) return UINT64_MAX; } uint64_t total = 0; size_t i = 0; do { const lzma_filter_coder *const fc = coder_find(filters[i].id); if (fc == NULL) return UINT64_MAX; // Unsupported Filter ID if (fc->memusage == NULL) { // This filter doesn't have a function to calculate // the memory usage and validate the options. Such // filters need only little memory, so we use 1 KiB // as a good estimate. They also accept all possible // options, so there's no need to worry about lack // of validation. total += 1024; } else { // Call the filter-specific memory usage calculation // function. const uint64_t usage = fc->memusage(filters[i].options); if (usage == UINT64_MAX) return UINT64_MAX; // Invalid options total += usage; } } while (filters[++i].id != LZMA_VLI_UNKNOWN); // Add some fixed amount of extra. It's to compensate memory usage // of Stream, Block etc. coders, malloc() overhead, stack etc. return total + LZMA_MEMUSAGE_BASE; } diff --git a/contrib/xz/src/liblzma/common/hardware_cputhreads.c b/contrib/xz/src/liblzma/common/hardware_cputhreads.c index f468366a6045..5d246d2cc083 100644 --- a/contrib/xz/src/liblzma/common/hardware_cputhreads.c +++ b/contrib/xz/src/liblzma/common/hardware_cputhreads.c @@ -1,22 +1,34 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file hardware_cputhreads.c /// \brief Get the number of CPU threads or cores // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "common.h" #include "tuklib_cpucores.h" +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// This is for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +LZMA_SYMVER_API("lzma_cputhreads@XZ_5.2.2", + uint32_t, lzma_cputhreads_522)(void) lzma_nothrow + __attribute__((__alias__("lzma_cputhreads_52"))); + +LZMA_SYMVER_API("lzma_cputhreads@@XZ_5.2", + uint32_t, lzma_cputhreads_52)(void) lzma_nothrow; + +#define lzma_cputhreads lzma_cputhreads_52 +#endif extern LZMA_API(uint32_t) lzma_cputhreads(void) { return tuklib_cpucores(); } diff --git a/contrib/xz/src/liblzma/common/index.c b/contrib/xz/src/liblzma/common/index.c index a41e8f33083b..24ec3c10c2e9 100644 --- a/contrib/xz/src/liblzma/common/index.c +++ b/contrib/xz/src/liblzma/common/index.c @@ -1,1250 +1,1262 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c /// \brief Handling of .xz Indexes and some other Stream information // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "index.h" #include "stream_flags_common.h" /// \brief How many Records to allocate at once /// /// This should be big enough to avoid making lots of tiny allocations /// but small enough to avoid too much unused memory at once. #define INDEX_GROUP_SIZE 512 /// \brief How many Records can be allocated at once at maximum #define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record)) /// \brief Base structure for index_stream and index_group structures typedef struct index_tree_node_s index_tree_node; struct index_tree_node_s { /// Uncompressed start offset of this Stream (relative to the /// beginning of the file) or Block (relative to the beginning /// of the Stream) lzma_vli uncompressed_base; /// Compressed start offset of this Stream or Block lzma_vli compressed_base; index_tree_node *parent; index_tree_node *left; index_tree_node *right; }; /// \brief AVL tree to hold index_stream or index_group structures typedef struct { /// Root node index_tree_node *root; /// Leftmost node. Since the tree will be filled sequentially, /// this won't change after the first node has been added to /// the tree. index_tree_node *leftmost; /// The rightmost node in the tree. Since the tree is filled /// sequentially, this is always the node where to add the new data. index_tree_node *rightmost; /// Number of nodes in the tree uint32_t count; } index_tree; typedef struct { lzma_vli uncompressed_sum; lzma_vli unpadded_sum; } index_record; typedef struct { /// Every Record group is part of index_stream.groups tree. index_tree_node node; /// Number of Blocks in this Stream before this group. lzma_vli number_base; /// Number of Records that can be put in records[]. size_t allocated; /// Index of the last Record in use. size_t last; /// The sizes in this array are stored as cumulative sums relative /// to the beginning of the Stream. This makes it possible to /// use binary search in lzma_index_locate(). /// /// Note that the cumulative summing is done specially for /// unpadded_sum: The previous value is rounded up to the next /// multiple of four before adding the Unpadded Size of the new /// Block. The total encoded size of the Blocks in the Stream /// is records[last].unpadded_sum in the last Record group of /// the Stream. /// /// For example, if the Unpadded Sizes are 39, 57, and 81, the /// stored values are 39, 97 (40 + 57), and 181 (100 + 181). /// The total encoded size of these Blocks is 184. /// /// This is a flexible array, because it makes easy to optimize /// memory usage in case someone concatenates many Streams that /// have only one or few Blocks. index_record records[]; } index_group; typedef struct { /// Every index_stream is a node in the tree of Streams. index_tree_node node; /// Number of this Stream (first one is 1) uint32_t number; /// Total number of Blocks before this Stream lzma_vli block_number_base; /// Record groups of this Stream are stored in a tree. /// It's a T-tree with AVL-tree balancing. There are /// INDEX_GROUP_SIZE Records per node by default. /// This keeps the number of memory allocations reasonable /// and finding a Record is fast. index_tree groups; /// Number of Records in this Stream lzma_vli record_count; /// Size of the List of Records field in this Stream. This is used /// together with record_count to calculate the size of the Index /// field and thus the total size of the Stream. lzma_vli index_list_size; /// Stream Flags of this Stream. This is meaningful only if /// the Stream Flags have been told us with lzma_index_stream_flags(). /// Initially stream_flags.version is set to UINT32_MAX to indicate /// that the Stream Flags are unknown. lzma_stream_flags stream_flags; /// Amount of Stream Padding after this Stream. This defaults to /// zero and can be set with lzma_index_stream_padding(). lzma_vli stream_padding; } index_stream; struct lzma_index_s { /// AVL-tree containing the Stream(s). Often there is just one /// Stream, but using a tree keeps lookups fast even when there /// are many concatenated Streams. index_tree streams; /// Uncompressed size of all the Blocks in the Stream(s) lzma_vli uncompressed_size; /// Total size of all the Blocks in the Stream(s) lzma_vli total_size; /// Total number of Records in all Streams in this lzma_index lzma_vli record_count; /// Size of the List of Records field if all the Streams in this /// lzma_index were packed into a single Stream (makes it simpler to /// take many .xz files and combine them into a single Stream). /// /// This value together with record_count is needed to calculate /// Backward Size that is stored into Stream Footer. lzma_vli index_list_size; /// How many Records to allocate at once in lzma_index_append(). /// This defaults to INDEX_GROUP_SIZE but can be overridden with /// lzma_index_prealloc(). size_t prealloc; /// Bitmask indicating what integrity check types have been used /// as set by lzma_index_stream_flags(). The bit of the last Stream /// is not included here, since it is possible to change it by /// calling lzma_index_stream_flags() again. uint32_t checks; }; static void index_tree_init(index_tree *tree) { tree->root = NULL; tree->leftmost = NULL; tree->rightmost = NULL; tree->count = 0; return; } /// Helper for index_tree_end() static void index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator, void (*free_func)(void *node, const lzma_allocator *allocator)) { // The tree won't ever be very huge, so recursion should be fine. // 20 levels in the tree is likely quite a lot already in practice. if (node->left != NULL) index_tree_node_end(node->left, allocator, free_func); if (node->right != NULL) index_tree_node_end(node->right, allocator, free_func); free_func(node, allocator); return; } /// Free the memory allocated for a tree. Each node is freed using the /// given free_func which is either &lzma_free or &index_stream_end. /// The latter is used to free the Record groups from each index_stream /// before freeing the index_stream itself. static void index_tree_end(index_tree *tree, const lzma_allocator *allocator, void (*free_func)(void *node, const lzma_allocator *allocator)) { assert(free_func != NULL); if (tree->root != NULL) index_tree_node_end(tree->root, allocator, free_func); return; } /// Add a new node to the tree. node->uncompressed_base and /// node->compressed_base must have been set by the caller already. static void index_tree_append(index_tree *tree, index_tree_node *node) { node->parent = tree->rightmost; node->left = NULL; node->right = NULL; ++tree->count; // Handle the special case of adding the first node. if (tree->root == NULL) { tree->root = node; tree->leftmost = node; tree->rightmost = node; return; } // The tree is always filled sequentially. assert(tree->rightmost->uncompressed_base <= node->uncompressed_base); assert(tree->rightmost->compressed_base < node->compressed_base); // Add the new node after the rightmost node. It's the correct // place due to the reason above. tree->rightmost->right = node; tree->rightmost = node; // Balance the AVL-tree if needed. We don't need to keep the balance // factors in nodes, because we always fill the tree sequentially, // and thus know the state of the tree just by looking at the node // count. From the node count we can calculate how many steps to go // up in the tree to find the rotation root. uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count)); if (up != 0) { // Locate the root node for the rotation. up = ctz32(tree->count) + 2; do { node = node->parent; } while (--up > 0); // Rotate left using node as the rotation root. index_tree_node *pivot = node->right; if (node->parent == NULL) { tree->root = pivot; } else { assert(node->parent->right == node); node->parent->right = pivot; } pivot->parent = node->parent; node->right = pivot->left; if (node->right != NULL) node->right->parent = node; pivot->left = node; node->parent = pivot; } return; } /// Get the next node in the tree. Return NULL if there are no more nodes. static void * index_tree_next(const index_tree_node *node) { if (node->right != NULL) { node = node->right; while (node->left != NULL) node = node->left; return (void *)(node); } while (node->parent != NULL && node->parent->right == node) node = node->parent; return (void *)(node->parent); } /// Locate a node that contains the given uncompressed offset. It is /// caller's job to check that target is not bigger than the uncompressed /// size of the tree (the last node would be returned in that case still). static void * index_tree_locate(const index_tree *tree, lzma_vli target) { const index_tree_node *result = NULL; const index_tree_node *node = tree->root; assert(tree->leftmost == NULL || tree->leftmost->uncompressed_base == 0); // Consecutive nodes may have the same uncompressed_base. // We must pick the rightmost one. while (node != NULL) { if (node->uncompressed_base > target) { node = node->left; } else { result = node; node = node->right; } } return (void *)(result); } /// Allocate and initialize a new Stream using the given base offsets. static index_stream * index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base, uint32_t stream_number, lzma_vli block_number_base, const lzma_allocator *allocator) { index_stream *s = lzma_alloc(sizeof(index_stream), allocator); if (s == NULL) return NULL; s->node.uncompressed_base = uncompressed_base; s->node.compressed_base = compressed_base; s->node.parent = NULL; s->node.left = NULL; s->node.right = NULL; s->number = stream_number; s->block_number_base = block_number_base; index_tree_init(&s->groups); s->record_count = 0; s->index_list_size = 0; s->stream_flags.version = UINT32_MAX; s->stream_padding = 0; return s; } /// Free the memory allocated for a Stream and its Record groups. static void index_stream_end(void *node, const lzma_allocator *allocator) { index_stream *s = node; index_tree_end(&s->groups, allocator, &lzma_free); lzma_free(s, allocator); return; } static lzma_index * index_init_plain(const lzma_allocator *allocator) { lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); if (i != NULL) { index_tree_init(&i->streams); i->uncompressed_size = 0; i->total_size = 0; i->record_count = 0; i->index_list_size = 0; i->prealloc = INDEX_GROUP_SIZE; i->checks = 0; } return i; } extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator) { lzma_index *i = index_init_plain(allocator); if (i == NULL) return NULL; index_stream *s = index_stream_init(0, 0, 1, 0, allocator); if (s == NULL) { lzma_free(i, allocator); return NULL; } index_tree_append(&i->streams, &s->node); return i; } extern LZMA_API(void) lzma_index_end(lzma_index *i, const lzma_allocator *allocator) { // NOTE: If you modify this function, check also the bottom // of lzma_index_cat(). if (i != NULL) { index_tree_end(&i->streams, allocator, &index_stream_end); lzma_free(i, allocator); } return; } extern void lzma_index_prealloc(lzma_index *i, lzma_vli records) { if (records > PREALLOC_MAX) records = PREALLOC_MAX; i->prealloc = (size_t)(records); return; } extern LZMA_API(uint64_t) lzma_index_memusage(lzma_vli streams, lzma_vli blocks) { // This calculates an upper bound that is only a little bit // bigger than the exact maximum memory usage with the given // parameters. // Typical malloc() overhead is 2 * sizeof(void *) but we take // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE // instead would give too inaccurate estimate. const size_t alloc_overhead = 4 * sizeof(void *); // Amount of memory needed for each Stream base structures. // We assume that every Stream has at least one Block and // thus at least one group. const size_t stream_base = sizeof(index_stream) + sizeof(index_group) + 2 * alloc_overhead; // Amount of memory needed per group. const size_t group_base = sizeof(index_group) + INDEX_GROUP_SIZE * sizeof(index_record) + alloc_overhead; // Number of groups. There may actually be more, but that overhead // has been taken into account in stream_base already. const lzma_vli groups = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE; // Memory used by index_stream and index_group structures. const uint64_t streams_mem = streams * stream_base; const uint64_t groups_mem = groups * group_base; // Memory used by the base structure. const uint64_t index_base = sizeof(lzma_index) + alloc_overhead; // Validate the arguments and catch integer overflows. // Maximum number of Streams is "only" UINT32_MAX, because // that limit is used by the tree containing the Streams. const uint64_t limit = UINT64_MAX - index_base; if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX || streams > limit / stream_base || groups > limit / group_base || limit - streams_mem < groups_mem) return UINT64_MAX; return index_base + streams_mem + groups_mem; } extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) { return lzma_index_memusage(i->streams.count, i->record_count); } extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) { return i->record_count; } extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) { return i->streams.count; } extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) { return index_size(i->record_count, i->index_list_size); } extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) { return i->total_size; } extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) { // Stream Header + Blocks + Index + Stream Footer return LZMA_STREAM_HEADER_SIZE + i->total_size + index_size(i->record_count, i->index_list_size) + LZMA_STREAM_HEADER_SIZE; } static lzma_vli index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum, lzma_vli record_count, lzma_vli index_list_size, lzma_vli stream_padding) { // Earlier Streams and Stream Paddings + Stream Header // + Blocks + Index + Stream Footer + Stream Padding // // This might go over LZMA_VLI_MAX due to too big unpadded_sum // when this function is used in lzma_index_append(). lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE + stream_padding + vli_ceil4(unpadded_sum); if (file_size > LZMA_VLI_MAX) return LZMA_VLI_UNKNOWN; // The same applies here. file_size += index_size(record_count, index_list_size); if (file_size > LZMA_VLI_MAX) return LZMA_VLI_UNKNOWN; return file_size; } extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) { const index_stream *s = (const index_stream *)(i->streams.rightmost); const index_group *g = (const index_group *)(s->groups.rightmost); return index_file_size(s->node.compressed_base, g == NULL ? 0 : g->records[g->last].unpadded_sum, s->record_count, s->index_list_size, s->stream_padding); } extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) { return i->uncompressed_size; } extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) { uint32_t checks = i->checks; // Get the type of the Check of the last Stream too. const index_stream *s = (const index_stream *)(i->streams.rightmost); if (s->stream_flags.version != UINT32_MAX) checks |= UINT32_C(1) << s->stream_flags.check; return checks; } extern uint32_t lzma_index_padding_size(const lzma_index *i) { return (LZMA_VLI_C(4) - index_size_unpadded( i->record_count, i->index_list_size)) & 3; } extern LZMA_API(lzma_ret) lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags) { if (i == NULL || stream_flags == NULL) return LZMA_PROG_ERROR; // Validate the Stream Flags. return_if_error(lzma_stream_flags_compare( stream_flags, stream_flags)); index_stream *s = (index_stream *)(i->streams.rightmost); s->stream_flags = *stream_flags; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding) { if (i == NULL || stream_padding > LZMA_VLI_MAX || (stream_padding & 3) != 0) return LZMA_PROG_ERROR; index_stream *s = (index_stream *)(i->streams.rightmost); // Check that the new value won't make the file grow too big. const lzma_vli old_stream_padding = s->stream_padding; s->stream_padding = 0; if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) { s->stream_padding = old_stream_padding; return LZMA_DATA_ERROR; } s->stream_padding = stream_padding; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_append(lzma_index *i, const lzma_allocator *allocator, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate. if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; index_stream *s = (index_stream *)(i->streams.rightmost); index_group *g = (index_group *)(s->groups.rightmost); const lzma_vli compressed_base = g == NULL ? 0 : vli_ceil4(g->records[g->last].unpadded_sum); const lzma_vli uncompressed_base = g == NULL ? 0 : g->records[g->last].uncompressed_sum; const uint32_t index_list_size_add = lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); + // Check that uncompressed size will not overflow. + if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + // Check that the file size will stay within limits. if (index_file_size(s->node.compressed_base, compressed_base + unpadded_size, s->record_count + 1, s->index_list_size + index_list_size_add, s->stream_padding) == LZMA_VLI_UNKNOWN) return LZMA_DATA_ERROR; // The size of the Index field must not exceed the maximum value // that can be stored in the Backward Size field. if (index_size(i->record_count + 1, i->index_list_size + index_list_size_add) > LZMA_BACKWARD_SIZE_MAX) return LZMA_DATA_ERROR; if (g != NULL && g->last + 1 < g->allocated) { // There is space in the last group at least for one Record. ++g->last; } else { // We need to allocate a new group. g = lzma_alloc(sizeof(index_group) + i->prealloc * sizeof(index_record), allocator); if (g == NULL) return LZMA_MEM_ERROR; g->last = 0; g->allocated = i->prealloc; // Reset prealloc so that if the application happens to // add new Records, the allocation size will be sane. i->prealloc = INDEX_GROUP_SIZE; // Set the start offsets of this group. g->node.uncompressed_base = uncompressed_base; g->node.compressed_base = compressed_base; g->number_base = s->record_count + 1; // Add the new group to the Stream. index_tree_append(&s->groups, &g->node); } // Add the new Record to the group. g->records[g->last].uncompressed_sum = uncompressed_base + uncompressed_size; g->records[g->last].unpadded_sum = compressed_base + unpadded_size; // Update the totals. ++s->record_count; s->index_list_size += index_list_size_add; i->total_size += vli_ceil4(unpadded_size); i->uncompressed_size += uncompressed_size; ++i->record_count; i->index_list_size += index_list_size_add; return LZMA_OK; } /// Structure to pass info to index_cat_helper() typedef struct { /// Uncompressed size of the destination lzma_vli uncompressed_size; /// Compressed file size of the destination lzma_vli file_size; /// Same as above but for Block numbers lzma_vli block_number_add; /// Number of Streams that were in the destination index before we /// started appending new Streams from the source index. This is /// used to fix the Stream numbering. uint32_t stream_number_add; /// Destination index' Stream tree index_tree *streams; } index_cat_info; /// Add the Stream nodes from the source index to dest using recursion. /// Simplest iterative traversal of the source tree wouldn't work, because /// we update the pointers in nodes when moving them to the destination tree. static void index_cat_helper(const index_cat_info *info, index_stream *this) { index_stream *left = (index_stream *)(this->node.left); index_stream *right = (index_stream *)(this->node.right); if (left != NULL) index_cat_helper(info, left); this->node.uncompressed_base += info->uncompressed_size; this->node.compressed_base += info->file_size; this->number += info->stream_number_add; this->block_number_base += info->block_number_add; index_tree_append(info->streams, &this->node); if (right != NULL) index_cat_helper(info, right); return; } extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, const lzma_allocator *allocator) { + if (dest == NULL || src == NULL) + return LZMA_PROG_ERROR; + const lzma_vli dest_file_size = lzma_index_file_size(dest); // Check that we don't exceed the file size limits. if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX || dest->uncompressed_size + src->uncompressed_size > LZMA_VLI_MAX) return LZMA_DATA_ERROR; // Check that the encoded size of the combined lzma_indexes stays // within limits. In theory, this should be done only if we know // that the user plans to actually combine the Streams and thus // construct a single Index (probably rare). However, exceeding // this limit is quite theoretical, so we do this check always // to simplify things elsewhere. { const lzma_vli dest_size = index_size_unpadded( dest->record_count, dest->index_list_size); const lzma_vli src_size = index_size_unpadded( src->record_count, src->index_list_size); if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) return LZMA_DATA_ERROR; } // Optimize the last group to minimize memory usage. Allocation has // to be done before modifying dest or src. { index_stream *s = (index_stream *)(dest->streams.rightmost); index_group *g = (index_group *)(s->groups.rightmost); if (g != NULL && g->last + 1 < g->allocated) { assert(g->node.left == NULL); assert(g->node.right == NULL); index_group *newg = lzma_alloc(sizeof(index_group) + (g->last + 1) * sizeof(index_record), allocator); if (newg == NULL) return LZMA_MEM_ERROR; newg->node = g->node; newg->allocated = g->last + 1; newg->last = g->last; newg->number_base = g->number_base; memcpy(newg->records, g->records, newg->allocated * sizeof(index_record)); if (g->node.parent != NULL) { assert(g->node.parent->right == &g->node); g->node.parent->right = &newg->node; } if (s->groups.leftmost == &g->node) { assert(s->groups.root == &g->node); s->groups.leftmost = &newg->node; s->groups.root = &newg->node; } assert(s->groups.rightmost == &g->node); s->groups.rightmost = &newg->node; lzma_free(g, allocator); // NOTE: newg isn't leaked here because // newg == (void *)&newg->node. } } + // dest->checks includes the check types of all except the last Stream + // in dest. Set the bit for the check type of the last Stream now so + // that it won't get lost when Stream(s) from src are appended to dest. + dest->checks = lzma_index_checks(dest); + // Add all the Streams from src to dest. Update the base offsets // of each Stream from src. const index_cat_info info = { .uncompressed_size = dest->uncompressed_size, .file_size = dest_file_size, .stream_number_add = dest->streams.count, .block_number_add = dest->record_count, .streams = &dest->streams, }; index_cat_helper(&info, (index_stream *)(src->streams.root)); // Update info about all the combined Streams. dest->uncompressed_size += src->uncompressed_size; dest->total_size += src->total_size; dest->record_count += src->record_count; dest->index_list_size += src->index_list_size; - dest->checks = lzma_index_checks(dest) | src->checks; + dest->checks |= src->checks; // There's nothing else left in src than the base structure. lzma_free(src, allocator); return LZMA_OK; } /// Duplicate an index_stream. static index_stream * index_dup_stream(const index_stream *src, const lzma_allocator *allocator) { // Catch a somewhat theoretical integer overflow. if (src->record_count > PREALLOC_MAX) return NULL; // Allocate and initialize a new Stream. index_stream *dest = index_stream_init(src->node.compressed_base, src->node.uncompressed_base, src->number, src->block_number_base, allocator); if (dest == NULL) return NULL; // Copy the overall information. dest->record_count = src->record_count; dest->index_list_size = src->index_list_size; dest->stream_flags = src->stream_flags; dest->stream_padding = src->stream_padding; // Return if there are no groups to duplicate. if (src->groups.leftmost == NULL) return dest; // Allocate memory for the Records. We put all the Records into // a single group. It's simplest and also tends to make // lzma_index_locate() a little bit faster with very big Indexes. index_group *destg = lzma_alloc(sizeof(index_group) + src->record_count * sizeof(index_record), allocator); if (destg == NULL) { index_stream_end(dest, allocator); return NULL; } // Initialize destg. destg->node.uncompressed_base = 0; destg->node.compressed_base = 0; destg->number_base = 1; destg->allocated = src->record_count; destg->last = src->record_count - 1; // Go through all the groups in src and copy the Records into destg. const index_group *srcg = (const index_group *)(src->groups.leftmost); size_t i = 0; do { memcpy(destg->records + i, srcg->records, (srcg->last + 1) * sizeof(index_record)); i += srcg->last + 1; srcg = index_tree_next(&srcg->node); } while (srcg != NULL); assert(i == destg->allocated); // Add the group to the new Stream. index_tree_append(&dest->groups, &destg->node); return dest; } extern LZMA_API(lzma_index *) lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator) { // Allocate the base structure (no initial Stream). lzma_index *dest = index_init_plain(allocator); if (dest == NULL) return NULL; // Copy the totals. dest->uncompressed_size = src->uncompressed_size; dest->total_size = src->total_size; dest->record_count = src->record_count; dest->index_list_size = src->index_list_size; // Copy the Streams and the groups in them. const index_stream *srcstream = (const index_stream *)(src->streams.leftmost); do { index_stream *deststream = index_dup_stream( srcstream, allocator); if (deststream == NULL) { lzma_index_end(dest, allocator); return NULL; } index_tree_append(&dest->streams, &deststream->node); srcstream = index_tree_next(&srcstream->node); } while (srcstream != NULL); return dest; } /// Indexing for lzma_index_iter.internal[] enum { ITER_INDEX, ITER_STREAM, ITER_GROUP, ITER_RECORD, ITER_METHOD, }; /// Values for lzma_index_iter.internal[ITER_METHOD].s enum { ITER_METHOD_NORMAL, ITER_METHOD_NEXT, ITER_METHOD_LEFTMOST, }; static void iter_set_info(lzma_index_iter *iter) { const lzma_index *i = iter->internal[ITER_INDEX].p; const index_stream *stream = iter->internal[ITER_STREAM].p; const index_group *group = iter->internal[ITER_GROUP].p; const size_t record = iter->internal[ITER_RECORD].s; // lzma_index_iter.internal must not contain a pointer to the last // group in the index, because that may be reallocated by // lzma_index_cat(). if (group == NULL) { // There are no groups. assert(stream->groups.root == NULL); iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; } else if (i->streams.rightmost != &stream->node || stream->groups.rightmost != &group->node) { // The group is not not the last group in the index. iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; } else if (stream->groups.leftmost != &group->node) { // The group isn't the only group in the Stream, thus we // know that it must have a parent group i.e. it's not // the root node. assert(stream->groups.root != &group->node); assert(group->node.parent->right == &group->node); iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT; iter->internal[ITER_GROUP].p = group->node.parent; } else { // The Stream has only one group. assert(stream->groups.root == &group->node); assert(group->node.parent == NULL); iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST; iter->internal[ITER_GROUP].p = NULL; } // NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t // internally. iter->stream.number = stream->number; iter->stream.block_count = stream->record_count; iter->stream.compressed_offset = stream->node.compressed_base; iter->stream.uncompressed_offset = stream->node.uncompressed_base; // iter->stream.flags will be NULL if the Stream Flags haven't been // set with lzma_index_stream_flags(). iter->stream.flags = stream->stream_flags.version == UINT32_MAX ? NULL : &stream->stream_flags; iter->stream.padding = stream->stream_padding; if (stream->groups.rightmost == NULL) { // Stream has no Blocks. iter->stream.compressed_size = index_size(0, 0) + 2 * LZMA_STREAM_HEADER_SIZE; iter->stream.uncompressed_size = 0; } else { const index_group *g = (const index_group *)( stream->groups.rightmost); // Stream Header + Stream Footer + Index + Blocks iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE + index_size(stream->record_count, stream->index_list_size) + vli_ceil4(g->records[g->last].unpadded_sum); iter->stream.uncompressed_size = g->records[g->last].uncompressed_sum; } if (group != NULL) { iter->block.number_in_stream = group->number_base + record; iter->block.number_in_file = iter->block.number_in_stream + stream->block_number_base; iter->block.compressed_stream_offset = record == 0 ? group->node.compressed_base : vli_ceil4(group->records[ record - 1].unpadded_sum); iter->block.uncompressed_stream_offset = record == 0 ? group->node.uncompressed_base : group->records[record - 1].uncompressed_sum; iter->block.uncompressed_size = group->records[record].uncompressed_sum - iter->block.uncompressed_stream_offset; iter->block.unpadded_size = group->records[record].unpadded_sum - iter->block.compressed_stream_offset; iter->block.total_size = vli_ceil4(iter->block.unpadded_size); iter->block.compressed_stream_offset += LZMA_STREAM_HEADER_SIZE; iter->block.compressed_file_offset = iter->block.compressed_stream_offset + iter->stream.compressed_offset; iter->block.uncompressed_file_offset = iter->block.uncompressed_stream_offset + iter->stream.uncompressed_offset; } return; } extern LZMA_API(void) lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i) { iter->internal[ITER_INDEX].p = i; lzma_index_iter_rewind(iter); return; } extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) { iter->internal[ITER_STREAM].p = NULL; iter->internal[ITER_GROUP].p = NULL; iter->internal[ITER_RECORD].s = 0; iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL; return; } extern LZMA_API(lzma_bool) lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode) { // Catch unsupported mode values. if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK) return true; const lzma_index *i = iter->internal[ITER_INDEX].p; const index_stream *stream = iter->internal[ITER_STREAM].p; const index_group *group = NULL; size_t record = iter->internal[ITER_RECORD].s; // If we are being asked for the next Stream, leave group to NULL // so that the rest of the this function thinks that this Stream // has no groups and will thus go to the next Stream. if (mode != LZMA_INDEX_ITER_STREAM) { // Get the pointer to the current group. See iter_set_inf() // for explanation. switch (iter->internal[ITER_METHOD].s) { case ITER_METHOD_NORMAL: group = iter->internal[ITER_GROUP].p; break; case ITER_METHOD_NEXT: group = index_tree_next(iter->internal[ITER_GROUP].p); break; case ITER_METHOD_LEFTMOST: group = (const index_group *)( stream->groups.leftmost); break; } } again: if (stream == NULL) { // We at the beginning of the lzma_index. // Locate the first Stream. stream = (const index_stream *)(i->streams.leftmost); if (mode >= LZMA_INDEX_ITER_BLOCK) { // Since we are being asked to return information // about the first a Block, skip Streams that have // no Blocks. while (stream->groups.leftmost == NULL) { stream = index_tree_next(&stream->node); if (stream == NULL) return true; } } // Start from the first Record in the Stream. group = (const index_group *)(stream->groups.leftmost); record = 0; } else if (group != NULL && record < group->last) { // The next Record is in the same group. ++record; } else { // This group has no more Records or this Stream has // no Blocks at all. record = 0; // If group is not NULL, this Stream has at least one Block // and thus at least one group. Find the next group. if (group != NULL) group = index_tree_next(&group->node); if (group == NULL) { // This Stream has no more Records. Find the next // Stream. If we are being asked to return information // about a Block, we skip empty Streams. do { stream = index_tree_next(&stream->node); if (stream == NULL) return true; } while (mode >= LZMA_INDEX_ITER_BLOCK && stream->groups.leftmost == NULL); group = (const index_group *)( stream->groups.leftmost); } } if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) { // We need to look for the next Block again if this Block // is empty. if (record == 0) { if (group->node.uncompressed_base == group->records[0].uncompressed_sum) goto again; } else if (group->records[record - 1].uncompressed_sum == group->records[record].uncompressed_sum) { goto again; } } iter->internal[ITER_STREAM].p = stream; iter->internal[ITER_GROUP].p = group; iter->internal[ITER_RECORD].s = record; iter_set_info(iter); return false; } extern LZMA_API(lzma_bool) lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target) { const lzma_index *i = iter->internal[ITER_INDEX].p; // If the target is past the end of the file, return immediately. if (i->uncompressed_size <= target) return true; // Locate the Stream containing the target offset. const index_stream *stream = index_tree_locate(&i->streams, target); assert(stream != NULL); target -= stream->node.uncompressed_base; // Locate the group containing the target offset. const index_group *group = index_tree_locate(&stream->groups, target); assert(group != NULL); // Use binary search to locate the exact Record. It is the first // Record whose uncompressed_sum is greater than target. - // This is because we want the rightmost Record that fullfills the + // This is because we want the rightmost Record that fulfills the // search criterion. It is possible that there are empty Blocks; // we don't want to return them. size_t left = 0; size_t right = group->last; while (left < right) { const size_t pos = left + (right - left) / 2; if (group->records[pos].uncompressed_sum <= target) left = pos + 1; else right = pos; } iter->internal[ITER_STREAM].p = stream; iter->internal[ITER_GROUP].p = group; iter->internal[ITER_RECORD].s = left; iter_set_info(iter); return false; } diff --git a/contrib/xz/src/liblzma/common/index_hash.c b/contrib/xz/src/liblzma/common/index_hash.c index d7a0344b76c3..5c9d314039da 100644 --- a/contrib/xz/src/liblzma/common/index_hash.c +++ b/contrib/xz/src/liblzma/common/index_hash.c @@ -1,334 +1,333 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index_hash.c /// \brief Validates Index by using a hash function // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "common.h" #include "index.h" #include "check.h" typedef struct { /// Sum of the Block sizes (including Block Padding) lzma_vli blocks_size; /// Sum of the Uncompressed Size fields lzma_vli uncompressed_size; /// Number of Records lzma_vli count; /// Size of the List of Index Records as bytes lzma_vli index_list_size; /// Check calculated from Unpadded Sizes and Uncompressed Sizes. lzma_check_state check; } lzma_index_hash_info; struct lzma_index_hash_s { enum { SEQ_BLOCK, SEQ_COUNT, SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, SEQ_CRC32, } sequence; /// Information collected while decoding the actual Blocks. lzma_index_hash_info blocks; /// Information collected from the Index field. lzma_index_hash_info records; /// Number of Records not fully decoded lzma_vli remaining; /// Unpadded Size currently being read from an Index Record. lzma_vli unpadded_size; /// Uncompressed Size currently being read from an Index Record. lzma_vli uncompressed_size; /// Position in variable-length integers when decoding them from /// the List of Records. size_t pos; /// CRC32 of the Index uint32_t crc32; }; extern LZMA_API(lzma_index_hash *) lzma_index_hash_init(lzma_index_hash *index_hash, const lzma_allocator *allocator) { if (index_hash == NULL) { index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); if (index_hash == NULL) return NULL; } index_hash->sequence = SEQ_BLOCK; index_hash->blocks.blocks_size = 0; index_hash->blocks.uncompressed_size = 0; index_hash->blocks.count = 0; index_hash->blocks.index_list_size = 0; index_hash->records.blocks_size = 0; index_hash->records.uncompressed_size = 0; index_hash->records.count = 0; index_hash->records.index_list_size = 0; index_hash->unpadded_size = 0; index_hash->uncompressed_size = 0; index_hash->pos = 0; index_hash->crc32 = 0; // These cannot fail because LZMA_CHECK_BEST is known to be supported. (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); return index_hash; } extern LZMA_API(void) lzma_index_hash_end(lzma_index_hash *index_hash, const lzma_allocator *allocator) { lzma_free(index_hash, allocator); return; } extern LZMA_API(lzma_vli) lzma_index_hash_size(const lzma_index_hash *index_hash) { // Get the size of the Index from ->blocks instead of ->records for // cases where application wants to know the Index Size before // decoding the Index. return index_size(index_hash->blocks.count, index_hash->blocks.index_list_size); } /// Updates the sizes and the hash without any validation. -static lzma_ret +static void hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, lzma_vli uncompressed_size) { info->blocks_size += vli_ceil4(unpadded_size); info->uncompressed_size += uncompressed_size; info->index_list_size += lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); ++info->count; const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; lzma_check_update(&info->check, LZMA_CHECK_BEST, (const uint8_t *)(sizes), sizeof(sizes)); - return LZMA_OK; + return; } extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate the arguments. if (index_hash->sequence != SEQ_BLOCK || unpadded_size < UNPADDED_SIZE_MIN || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; // Update the hash. - return_if_error(hash_append(&index_hash->blocks, - unpadded_size, uncompressed_size)); + hash_append(&index_hash->blocks, unpadded_size, uncompressed_size); // Validate the properties of *info are still in allowed limits. if (index_hash->blocks.blocks_size > LZMA_VLI_MAX || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX || index_size(index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_BACKWARD_SIZE_MAX || index_stream_size(index_hash->blocks.blocks_size, index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_VLI_MAX) return LZMA_DATA_ERROR; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, size_t *in_pos, size_t in_size) { // Catch zero input buffer here, because in contrast to Index encoder // and decoder functions, applications call this function directly // instead of via lzma_code(), which does the buffer checking. if (*in_pos >= in_size) return LZMA_BUF_ERROR; // NOTE: This function has many similarities to index_encode() and // index_decode() functions found from index_encoder.c and // index_decoder.c. See the comments especially in index_encoder.c. const size_t in_start = *in_pos; lzma_ret ret = LZMA_OK; while (*in_pos < in_size) switch (index_hash->sequence) { case SEQ_BLOCK: // Check the Index Indicator is present. if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; index_hash->sequence = SEQ_COUNT; break; case SEQ_COUNT: { ret = lzma_vli_decode(&index_hash->remaining, &index_hash->pos, in, in_pos, in_size); if (ret != LZMA_STREAM_END) goto out; // The count must match the count of the Blocks decoded. if (index_hash->remaining != index_hash->blocks.count) return LZMA_DATA_ERROR; ret = LZMA_OK; index_hash->pos = 0; // Handle the special case when there are no Blocks. index_hash->sequence = index_hash->remaining == 0 ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { lzma_vli *size = index_hash->sequence == SEQ_UNPADDED ? &index_hash->unpadded_size : &index_hash->uncompressed_size; ret = lzma_vli_decode(size, &index_hash->pos, in, in_pos, in_size); if (ret != LZMA_STREAM_END) goto out; ret = LZMA_OK; index_hash->pos = 0; if (index_hash->sequence == SEQ_UNPADDED) { if (index_hash->unpadded_size < UNPADDED_SIZE_MIN || index_hash->unpadded_size > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; index_hash->sequence = SEQ_UNCOMPRESSED; } else { // Update the hash. - return_if_error(hash_append(&index_hash->records, + hash_append(&index_hash->records, index_hash->unpadded_size, - index_hash->uncompressed_size)); + index_hash->uncompressed_size); // Verify that we don't go over the known sizes. Note // that this validation is simpler than the one used // in lzma_index_hash_append(), because here we know // that values in index_hash->blocks are already // validated and we are fine as long as we don't // exceed them in index_hash->records. if (index_hash->blocks.blocks_size < index_hash->records.blocks_size || index_hash->blocks.uncompressed_size < index_hash->records.uncompressed_size || index_hash->blocks.index_list_size < index_hash->records.index_list_size) return LZMA_DATA_ERROR; // Check if this was the last Record. index_hash->sequence = --index_hash->remaining == 0 ? SEQ_PADDING_INIT : SEQ_UNPADDED; } break; } case SEQ_PADDING_INIT: index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( index_hash->records.count, index_hash->records.index_list_size)) & 3; index_hash->sequence = SEQ_PADDING; // Fall through case SEQ_PADDING: if (index_hash->pos > 0) { --index_hash->pos; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; break; } // Compare the sizes. if (index_hash->blocks.blocks_size != index_hash->records.blocks_size || index_hash->blocks.uncompressed_size != index_hash->records.uncompressed_size || index_hash->blocks.index_list_size != index_hash->records.index_list_size) return LZMA_DATA_ERROR; // Finish the hashes and compare them. lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); if (memcmp(index_hash->blocks.check.buffer.u8, index_hash->records.check.buffer.u8, lzma_check_size(LZMA_CHECK_BEST)) != 0) return LZMA_DATA_ERROR; // Finish the CRC32 calculation. index_hash->crc32 = lzma_crc32(in + in_start, *in_pos - in_start, index_hash->crc32); index_hash->sequence = SEQ_CRC32; // Fall through case SEQ_CRC32: do { if (*in_pos == in_size) return LZMA_OK; if (((index_hash->crc32 >> (index_hash->pos * 8)) & 0xFF) != in[(*in_pos)++]) return LZMA_DATA_ERROR; } while (++index_hash->pos < 4); return LZMA_STREAM_END; default: assert(0); return LZMA_PROG_ERROR; } out: // Update the CRC32, index_hash->crc32 = lzma_crc32(in + in_start, *in_pos - in_start, index_hash->crc32); return ret; } diff --git a/contrib/xz/src/liblzma/common/memcmplen.h b/contrib/xz/src/liblzma/common/memcmplen.h index dcfd8d6f89d1..b76a0b6327ed 100644 --- a/contrib/xz/src/liblzma/common/memcmplen.h +++ b/contrib/xz/src/liblzma/common/memcmplen.h @@ -1,164 +1,163 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file memcmplen.h /// \brief Optimized comparison of two buffers // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_MEMCMPLEN_H #define LZMA_MEMCMPLEN_H #include "common.h" #ifdef HAVE_IMMINTRIN_H # include #endif /// Find out how many equal bytes the two buffers have. /// /// \param buf1 First buffer /// \param buf2 Second buffer /// \param len How many bytes have already been compared and will /// be assumed to match /// \param limit How many bytes to compare at most, including the /// already-compared bytes. This must be significantly /// smaller than UINT32_MAX to avoid integer overflows. /// Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past /// the specified limit from both buf1 and buf2. /// /// \return Number of equal bytes in the buffers is returned. /// This is always at least len and at most limit. /// /// \note LZMA_MEMCMPLEN_EXTRA defines how many extra bytes may be read. /// It's rounded up to 2^n. This extra amount needs to be /// allocated in the buffers being used. It needs to be /// initialized too to keep Valgrind quiet. static inline uint32_t lzma_attribute((__always_inline__)) lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2, uint32_t len, uint32_t limit) { assert(len <= limit); assert(limit <= UINT32_MAX / 2); #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ && ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \ || (defined(__INTEL_COMPILER) && defined(_M_X64)) \ || (defined(_MSC_VER) && defined(_M_X64))) // NOTE: This will use 64-bit unaligned access which // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but // it's convenient here at least as long as it's x86-64 only. // // I keep this x86-64 only for now since that's where I know this // to be a good method. This may be fine on other 64-bit CPUs too. // On big endian one should use xor instead of subtraction and switch // to __builtin_clzll(). #define LZMA_MEMCMPLEN_EXTRA 8 while (len < limit) { const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len); if (x != 0) { # if defined(_M_X64) // MSVC or Intel C compiler on Windows unsigned long tmp; _BitScanForward64(&tmp, x); len += (uint32_t)tmp >> 3; # else // GCC, clang, or Intel C compiler len += (uint32_t)__builtin_ctzll(x) >> 3; # endif return my_min(len, limit); } len += 8; } return limit; #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ && defined(HAVE__MM_MOVEMASK_EPI8) \ - && ((defined(__GNUC__) && defined(__SSE2_MATH__)) \ - || (defined(__INTEL_COMPILER) && defined(__SSE2__)) \ + && (defined(__SSE2__) \ || (defined(_MSC_VER) && defined(_M_IX86_FP) \ && _M_IX86_FP >= 2)) // NOTE: Like above, this will use 128-bit unaligned access which // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit. // // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above // version is sometimes significantly faster and sometimes // slightly slower than this SSE2 version, so this SSE2 // version isn't used on x86-64. # define LZMA_MEMCMPLEN_EXTRA 16 while (len < limit) { const uint32_t x = 0xFFFF ^ _mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128((const __m128i *)(buf1 + len)), _mm_loadu_si128((const __m128i *)(buf2 + len)))); if (x != 0) { len += ctz32(x); return my_min(len, limit); } len += 16; } return limit; #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN) // Generic 32-bit little endian method # define LZMA_MEMCMPLEN_EXTRA 4 while (len < limit) { uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len); if (x != 0) { if ((x & 0xFFFF) == 0) { len += 2; x >>= 16; } if ((x & 0xFF) == 0) ++len; return my_min(len, limit); } len += 4; } return limit; #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN) // Generic 32-bit big endian method # define LZMA_MEMCMPLEN_EXTRA 4 while (len < limit) { uint32_t x = read32ne(buf1 + len) ^ read32ne(buf2 + len); if (x != 0) { if ((x & 0xFFFF0000) == 0) { len += 2; x <<= 16; } if ((x & 0xFF000000) == 0) ++len; return my_min(len, limit); } len += 4; } return limit; #else // Simple portable version that doesn't use unaligned access. # define LZMA_MEMCMPLEN_EXTRA 0 while (len < limit && buf1[len] == buf2[len]) ++len; return len; #endif } #endif diff --git a/contrib/xz/src/liblzma/common/stream_decoder.c b/contrib/xz/src/liblzma/common/stream_decoder.c index fdd8ff2f9a3e..6885d0c7c8cf 100644 --- a/contrib/xz/src/liblzma/common/stream_decoder.c +++ b/contrib/xz/src/liblzma/common/stream_decoder.c @@ -1,467 +1,475 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_decoder.c /// \brief Decodes .xz Streams // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" #include "block_decoder.h" typedef struct { enum { SEQ_STREAM_HEADER, SEQ_BLOCK_HEADER, - SEQ_BLOCK, + SEQ_BLOCK_INIT, + SEQ_BLOCK_RUN, SEQ_INDEX, SEQ_STREAM_FOOTER, SEQ_STREAM_PADDING, } sequence; - /// Block or Metadata decoder. This takes little memory and the same - /// data structure can be used to decode every Block Header, so it's - /// a good idea to have a separate lzma_next_coder structure for it. + /// Block decoder lzma_next_coder block_decoder; /// Block options decoded by the Block Header decoder and used by /// the Block decoder. lzma_block block_options; /// Stream Flags from Stream Header lzma_stream_flags stream_flags; /// Index is hashed so that it can be compared to the sizes of Blocks /// with O(1) memory usage. lzma_index_hash *index_hash; /// Memory usage limit uint64_t memlimit; /// Amount of memory actually needed (only an estimate) uint64_t memusage; /// If true, LZMA_NO_CHECK is returned if the Stream has /// no integrity check. bool tell_no_check; /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has /// an integrity check that isn't supported by this liblzma build. bool tell_unsupported_check; /// If true, LZMA_GET_CHECK is returned after decoding Stream Header. bool tell_any_check; /// If true, we will tell the Block decoder to skip calculating /// and verifying the integrity check. bool ignore_check; /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned - /// once the application isn't giving us any new input, and we aren't - /// in the middle of a Stream, and possible Stream Padding is a - /// multiple of four bytes. + /// once the application isn't giving us any new input (LZMA_FINISH), + /// and we aren't in the middle of a Stream, and possible + /// Stream Padding is a multiple of four bytes. bool concatenated; /// When decoding concatenated Streams, this is true as long as we /// are decoding the first Stream. This is needed to avoid misleading /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic /// bytes. bool first_stream; /// Write position in buffer[] and position in Stream Padding size_t pos; /// Buffer to hold Stream Header, Block Header, and Stream Footer. /// Block Header has biggest maximum size. uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; } lzma_stream_coder; static lzma_ret stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator) { // Initialize the Index hash used to verify the Index. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator); if (coder->index_hash == NULL) return LZMA_MEM_ERROR; // Reset the rest of the variables. coder->sequence = SEQ_STREAM_HEADER; coder->pos = 0; return LZMA_OK; } static lzma_ret stream_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_stream_coder *coder = coder_ptr; // When decoding the actual Block, it may be able to produce more // output even if we don't give it any new input. while (true) switch (coder->sequence) { case SEQ_STREAM_HEADER: { // Copy the Stream Header to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); // Return if we didn't get the whole Stream Header yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Header. const lzma_ret ret = lzma_stream_header_decode( &coder->stream_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR && !coder->first_stream ? LZMA_DATA_ERROR : ret; // If we are decoding concatenated Streams, and the later // Streams have invalid Header Magic Bytes, we give // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR. coder->first_stream = false; // Copy the type of the Check so that Block Header and Block // decoders see it. coder->block_options.check = coder->stream_flags.check; // Even if we return LZMA_*_CHECK below, we want // to continue from Block Header decoding. coder->sequence = SEQ_BLOCK_HEADER; // Detect if there's no integrity check or if it is // unsupported if those were requested by the application. if (coder->tell_no_check && coder->stream_flags.check == LZMA_CHECK_NONE) return LZMA_NO_CHECK; if (coder->tell_unsupported_check && !lzma_check_is_supported( coder->stream_flags.check)) return LZMA_UNSUPPORTED_CHECK; if (coder->tell_any_check) return LZMA_GET_CHECK; } // Fall through case SEQ_BLOCK_HEADER: { if (*in_pos >= in_size) return LZMA_OK; if (coder->pos == 0) { // Detect if it's Index. if (in[*in_pos] == 0x00) { coder->sequence = SEQ_INDEX; break; } // Calculate the size of the Block Header. Note that // Block Header decoder wants to see this byte too // so don't advance *in_pos. coder->block_options.header_size = lzma_block_header_size_decode( in[*in_pos]); } // Copy the Block Header to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, coder->block_options.header_size); // Return if we didn't get the whole Block Header yet. if (coder->pos < coder->block_options.header_size) return LZMA_OK; coder->pos = 0; + coder->sequence = SEQ_BLOCK_INIT; + } + + // Fall through + + case SEQ_BLOCK_INIT: { + // Checking memusage and doing the initialization needs + // its own sequence point because we need to be able to + // retry if we return LZMA_MEMLIMIT_ERROR. // Version 1 is needed to support the .ignore_check option. coder->block_options.version = 1; // Set up a buffer to hold the filter chain. Block Header // decoder will initialize all members of this array so // we don't need to do it here. lzma_filter filters[LZMA_FILTERS_MAX + 1]; coder->block_options.filters = filters; // Decode the Block Header. return_if_error(lzma_block_header_decode(&coder->block_options, allocator, coder->buffer)); // If LZMA_IGNORE_CHECK was used, this flag needs to be set. // It has to be set after lzma_block_header_decode() because // it always resets this to false. coder->block_options.ignore_check = coder->ignore_check; // Check the memory usage limit. const uint64_t memusage = lzma_raw_decoder_memusage(filters); lzma_ret ret; if (memusage == UINT64_MAX) { // One or more unknown Filter IDs. ret = LZMA_OPTIONS_ERROR; } else { // Now we can set coder->memusage since we know that // the filter chain is valid. We don't want // lzma_memusage() to return UINT64_MAX in case of // invalid filter chain. coder->memusage = memusage; if (memusage > coder->memlimit) { // The chain would need too much memory. ret = LZMA_MEMLIMIT_ERROR; } else { // Memory usage is OK. // Initialize the Block decoder. ret = lzma_block_decoder_init( &coder->block_decoder, allocator, &coder->block_options); } } // Free the allocated filter options since they are needed // only to initialize the Block decoder. for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) lzma_free(filters[i].options, allocator); coder->block_options.filters = NULL; - // Check if memory usage calculation and Block enocoder + // Check if memory usage calculation and Block decoder // initialization succeeded. if (ret != LZMA_OK) return ret; - coder->sequence = SEQ_BLOCK; + coder->sequence = SEQ_BLOCK_RUN; } // Fall through - case SEQ_BLOCK: { + case SEQ_BLOCK_RUN: { const lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); if (ret != LZMA_STREAM_END) return ret; // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_HEADER; break; } case SEQ_INDEX: { // If we don't have any input, don't call // lzma_index_hash_decode() since it would return // LZMA_BUF_ERROR, which we must not do here. if (*in_pos >= in_size) return LZMA_OK; // Decode the Index and compare it to the hash calculated // from the sizes of the Blocks (if any). const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, in, in_pos, in_size); if (ret != LZMA_STREAM_END) return ret; coder->sequence = SEQ_STREAM_FOOTER; } // Fall through case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); // Return if we didn't get the whole Stream Footer yet. if (coder->pos < LZMA_STREAM_HEADER_SIZE) return LZMA_OK; coder->pos = 0; // Decode the Stream Footer. The decoder gives // LZMA_FORMAT_ERROR if the magic bytes don't match, // so convert that return code to LZMA_DATA_ERROR. lzma_stream_flags footer_flags; const lzma_ret ret = lzma_stream_footer_decode( &footer_flags, coder->buffer); if (ret != LZMA_OK) return ret == LZMA_FORMAT_ERROR ? LZMA_DATA_ERROR : ret; // Check that Index Size stored in the Stream Footer matches // the real size of the Index field. if (lzma_index_hash_size(coder->index_hash) != footer_flags.backward_size) return LZMA_DATA_ERROR; // Compare that the Stream Flags fields are identical in // both Stream Header and Stream Footer. return_if_error(lzma_stream_flags_compare( &coder->stream_flags, &footer_flags)); if (!coder->concatenated) return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; } // Fall through case SEQ_STREAM_PADDING: assert(coder->concatenated); // Skip over possible Stream Padding. while (true) { if (*in_pos >= in_size) { // Unless LZMA_FINISH was used, we cannot // know if there's more input coming later. if (action != LZMA_FINISH) return LZMA_OK; // Stream Padding must be a multiple of // four bytes. return coder->pos == 0 ? LZMA_STREAM_END : LZMA_DATA_ERROR; } // If the byte is not zero, it probably indicates // beginning of a new Stream (or the file is corrupt). if (in[*in_pos] != 0x00) break; ++*in_pos; coder->pos = (coder->pos + 1) & 3; } // Stream Padding must be a multiple of four bytes (empty // Stream Padding is OK). if (coder->pos != 0) { ++*in_pos; return LZMA_DATA_ERROR; } // Prepare to decode the next Stream. return_if_error(stream_decoder_reset(coder, allocator)); break; default: assert(0); return LZMA_PROG_ERROR; } // Never reached } static void stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_stream_coder *coder = coder_ptr; lzma_next_end(&coder->block_decoder, allocator); lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_check stream_decoder_get_check(const void *coder_ptr) { const lzma_stream_coder *coder = coder_ptr; return coder->stream_flags.check; } static lzma_ret stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_stream_coder *coder = coder_ptr; *memusage = coder->memusage; *old_memlimit = coder->memlimit; if (new_memlimit != 0) { if (new_memlimit < coder->memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit = new_memlimit; } return LZMA_OK; } extern lzma_ret lzma_stream_decoder_init( lzma_next_coder *next, const lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator); if (flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; lzma_stream_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &stream_decode; next->end = &stream_decoder_end; next->get_check = &stream_decoder_get_check; next->memconfig = &stream_decoder_memconfig; coder->block_decoder = LZMA_NEXT_CODER_INIT; coder->index_hash = NULL; } coder->memlimit = my_max(1, memlimit); coder->memusage = LZMA_MEMUSAGE_BASE; coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0; coder->tell_unsupported_check = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; coder->concatenated = (flags & LZMA_CONCATENATED) != 0; coder->first_stream = true; return stream_decoder_reset(coder, allocator); } extern LZMA_API(lzma_ret) lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) { lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/common/stream_encoder_mt.c b/contrib/xz/src/liblzma/common/stream_encoder_mt.c index 01e40339750a..819b22733b68 100644 --- a/contrib/xz/src/liblzma/common/stream_encoder_mt.c +++ b/contrib/xz/src/liblzma/common/stream_encoder_mt.c @@ -1,1143 +1,1188 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_encoder_mt.c /// \brief Multithreaded .xz Stream encoder // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "filter_encoder.h" #include "easy_preset.h" #include "block_encoder.h" #include "block_buffer_encoder.h" #include "index_encoder.h" #include "outqueue.h" /// Maximum supported block size. This makes it simpler to prevent integer /// overflows if we are given unusually large block size. #define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX) typedef enum { /// Waiting for work. THR_IDLE, /// Encoding is in progress. THR_RUN, /// Encoding is in progress but no more input data will /// be read. THR_FINISH, /// The main thread wants the thread to stop whatever it was doing /// but not exit. THR_STOP, /// The main thread wants the thread to exit. We could use /// cancellation but since there's stopped anyway, this is lazier. THR_EXIT, } worker_state; typedef struct lzma_stream_coder_s lzma_stream_coder; typedef struct worker_thread_s worker_thread; struct worker_thread_s { worker_state state; /// Input buffer of coder->block_size bytes. The main thread will /// put new input into this and update in_size accordingly. Once /// no more input is coming, state will be set to THR_FINISH. uint8_t *in; /// Amount of data available in the input buffer. This is modified /// only by the main thread. size_t in_size; /// Output buffer for this thread. This is set by the main /// thread every time a new Block is started with this thread /// structure. lzma_outbuf *outbuf; /// Pointer to the main structure is needed when putting this /// thread back to the stack of free threads. lzma_stream_coder *coder; /// The allocator is set by the main thread. Since a copy of the /// pointer is kept here, the application must not change the /// allocator before calling lzma_end(). const lzma_allocator *allocator; /// Amount of uncompressed data that has already been compressed. uint64_t progress_in; /// Amount of compressed data that is ready. uint64_t progress_out; /// Block encoder lzma_next_coder block_encoder; /// Compression options for this Block lzma_block block_options; /// Next structure in the stack of free worker threads. worker_thread *next; mythread_mutex mutex; mythread_cond cond; /// The ID of this thread is used to join the thread /// when it's not needed anymore. mythread thread_id; }; struct lzma_stream_coder_s { enum { SEQ_STREAM_HEADER, SEQ_BLOCK, SEQ_INDEX, SEQ_STREAM_FOOTER, } sequence; /// Start a new Block every block_size bytes of input unless /// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier. size_t block_size; /// The filter chain currently in use lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Index to hold sizes of the Blocks lzma_index *index; /// Index encoder lzma_next_coder index_encoder; /// Stream Flags for encoding the Stream Header and Stream Footer. lzma_stream_flags stream_flags; /// Buffer to hold Stream Header and Stream Footer. uint8_t header[LZMA_STREAM_HEADER_SIZE]; /// Read position in header[] size_t header_pos; /// Output buffer queue for compressed data lzma_outq outq; /// Maximum wait time if cannot use all the input and cannot /// fill the output buffer. This is in milliseconds. uint32_t timeout; /// Error code from a worker thread lzma_ret thread_error; /// Array of allocated thread-specific structures worker_thread *threads; /// Number of structures in "threads" above. This is also the /// number of threads that will be created at maximum. uint32_t threads_max; /// Number of thread structures that have been initialized, and /// thus the number of worker threads actually created so far. uint32_t threads_initialized; /// Stack of free threads. When a thread finishes, it puts itself /// back into this stack. This starts as empty because threads /// are created only when actually needed. worker_thread *threads_free; /// The most recent worker thread to which the main thread writes /// the new input from the application. worker_thread *thr; /// Amount of uncompressed data in Blocks that have already /// been finished. uint64_t progress_in; /// Amount of compressed data in Stream Header + Blocks that /// have already been finished. uint64_t progress_out; mythread_mutex mutex; mythread_cond cond; }; /// Tell the main thread that something has gone wrong. static void worker_error(worker_thread *thr, lzma_ret ret) { assert(ret != LZMA_OK); assert(ret != LZMA_STREAM_END); mythread_sync(thr->coder->mutex) { if (thr->coder->thread_error == LZMA_OK) thr->coder->thread_error = ret; mythread_cond_signal(&thr->coder->cond); } return; } static worker_state worker_encode(worker_thread *thr, worker_state state) { assert(thr->progress_in == 0); assert(thr->progress_out == 0); // Set the Block options. thr->block_options = (lzma_block){ .version = 0, .check = thr->coder->stream_flags.check, .compressed_size = thr->coder->outq.buf_size_max, .uncompressed_size = thr->coder->block_size, // TODO: To allow changing the filter chain, the filters // array must be copied to each worker_thread. .filters = thr->coder->filters, }; // Calculate maximum size of the Block Header. This amount is // reserved in the beginning of the buffer so that Block Header // along with Compressed Size and Uncompressed Size can be // written there. lzma_ret ret = lzma_block_header_size(&thr->block_options); if (ret != LZMA_OK) { worker_error(thr, ret); return THR_STOP; } // Initialize the Block encoder. ret = lzma_block_encoder_init(&thr->block_encoder, thr->allocator, &thr->block_options); if (ret != LZMA_OK) { worker_error(thr, ret); return THR_STOP; } size_t in_pos = 0; size_t in_size = 0; thr->outbuf->size = thr->block_options.header_size; const size_t out_size = thr->coder->outq.buf_size_max; do { mythread_sync(thr->mutex) { // Store in_pos and out_pos into *thr so that // an application may read them via // lzma_get_progress() to get progress information. // // NOTE: These aren't updated when the encoding // finishes. Instead, the final values are taken // later from thr->outbuf. thr->progress_in = in_pos; thr->progress_out = thr->outbuf->size; while (in_size == thr->in_size && thr->state == THR_RUN) mythread_cond_wait(&thr->cond, &thr->mutex); state = thr->state; in_size = thr->in_size; } // Return if we were asked to stop or exit. if (state >= THR_STOP) return state; lzma_action action = state == THR_FINISH ? LZMA_FINISH : LZMA_RUN; // Limit the amount of input given to the Block encoder // at once. This way this thread can react fairly quickly // if the main thread wants us to stop or exit. static const size_t in_chunk_max = 16384; size_t in_limit = in_size; if (in_size - in_pos > in_chunk_max) { in_limit = in_pos + in_chunk_max; action = LZMA_RUN; } ret = thr->block_encoder.code( thr->block_encoder.coder, thr->allocator, thr->in, &in_pos, in_limit, thr->outbuf->buf, &thr->outbuf->size, out_size, action); } while (ret == LZMA_OK && thr->outbuf->size < out_size); switch (ret) { case LZMA_STREAM_END: assert(state == THR_FINISH); // Encode the Block Header. By doing it after // the compression, we can store the Compressed Size // and Uncompressed Size fields. ret = lzma_block_header_encode(&thr->block_options, thr->outbuf->buf); if (ret != LZMA_OK) { worker_error(thr, ret); return THR_STOP; } break; case LZMA_OK: // The data was incompressible. Encode it using uncompressed // LZMA2 chunks. // // First wait that we have gotten all the input. mythread_sync(thr->mutex) { while (thr->state == THR_RUN) mythread_cond_wait(&thr->cond, &thr->mutex); state = thr->state; in_size = thr->in_size; } if (state >= THR_STOP) return state; // Do the encoding. This takes care of the Block Header too. thr->outbuf->size = 0; ret = lzma_block_uncomp_encode(&thr->block_options, thr->in, in_size, thr->outbuf->buf, &thr->outbuf->size, out_size); // It shouldn't fail. if (ret != LZMA_OK) { worker_error(thr, LZMA_PROG_ERROR); return THR_STOP; } break; default: worker_error(thr, ret); return THR_STOP; } // Set the size information that will be read by the main thread // to write the Index field. thr->outbuf->unpadded_size = lzma_block_unpadded_size(&thr->block_options); assert(thr->outbuf->unpadded_size != 0); thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size; return THR_FINISH; } static MYTHREAD_RET_TYPE worker_start(void *thr_ptr) { worker_thread *thr = thr_ptr; worker_state state = THR_IDLE; // Init to silence a warning while (true) { // Wait for work. mythread_sync(thr->mutex) { while (true) { // The thread is already idle so if we are // requested to stop, just set the state. if (thr->state == THR_STOP) { thr->state = THR_IDLE; mythread_cond_signal(&thr->cond); } state = thr->state; if (state != THR_IDLE) break; mythread_cond_wait(&thr->cond, &thr->mutex); } } assert(state != THR_IDLE); assert(state != THR_STOP); if (state <= THR_FINISH) state = worker_encode(thr, state); if (state == THR_EXIT) break; // Mark the thread as idle unless the main thread has // told us to exit. Signal is needed for the case // where the main thread is waiting for the threads to stop. mythread_sync(thr->mutex) { if (thr->state != THR_EXIT) { thr->state = THR_IDLE; mythread_cond_signal(&thr->cond); } } mythread_sync(thr->coder->mutex) { // Mark the output buffer as finished if // no errors occurred. thr->outbuf->finished = state == THR_FINISH; // Update the main progress info. thr->coder->progress_in += thr->outbuf->uncompressed_size; thr->coder->progress_out += thr->outbuf->size; thr->progress_in = 0; thr->progress_out = 0; // Return this thread to the stack of free threads. thr->next = thr->coder->threads_free; thr->coder->threads_free = thr; mythread_cond_signal(&thr->coder->cond); } } // Exiting, free the resources. mythread_mutex_destroy(&thr->mutex); mythread_cond_destroy(&thr->cond); lzma_next_end(&thr->block_encoder, thr->allocator); lzma_free(thr->in, thr->allocator); return MYTHREAD_RET_VALUE; } /// Make the threads stop but not exit. Optionally wait for them to stop. static void threads_stop(lzma_stream_coder *coder, bool wait_for_threads) { // Tell the threads to stop. for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { coder->threads[i].state = THR_STOP; mythread_cond_signal(&coder->threads[i].cond); } } if (!wait_for_threads) return; // Wait for the threads to settle in the idle state. for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { while (coder->threads[i].state != THR_IDLE) mythread_cond_wait(&coder->threads[i].cond, &coder->threads[i].mutex); } } return; } /// Stop the threads and free the resources associated with them. /// Wait until the threads have exited. static void threads_end(lzma_stream_coder *coder, const lzma_allocator *allocator) { for (uint32_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { coder->threads[i].state = THR_EXIT; mythread_cond_signal(&coder->threads[i].cond); } } for (uint32_t i = 0; i < coder->threads_initialized; ++i) { int ret = mythread_join(coder->threads[i].thread_id); assert(ret == 0); (void)ret; } lzma_free(coder->threads, allocator); return; } /// Initialize a new worker_thread structure and create a new thread. static lzma_ret initialize_new_thread(lzma_stream_coder *coder, const lzma_allocator *allocator) { worker_thread *thr = &coder->threads[coder->threads_initialized]; thr->in = lzma_alloc(coder->block_size, allocator); if (thr->in == NULL) return LZMA_MEM_ERROR; if (mythread_mutex_init(&thr->mutex)) goto error_mutex; if (mythread_cond_init(&thr->cond)) goto error_cond; thr->state = THR_IDLE; thr->allocator = allocator; thr->coder = coder; thr->progress_in = 0; thr->progress_out = 0; thr->block_encoder = LZMA_NEXT_CODER_INIT; if (mythread_create(&thr->thread_id, &worker_start, thr)) goto error_thread; ++coder->threads_initialized; coder->thr = thr; return LZMA_OK; error_thread: mythread_cond_destroy(&thr->cond); error_cond: mythread_mutex_destroy(&thr->mutex); error_mutex: lzma_free(thr->in, allocator); return LZMA_MEM_ERROR; } static lzma_ret get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator) { // If there are no free output subqueues, there is no // point to try getting a thread. if (!lzma_outq_has_buf(&coder->outq)) return LZMA_OK; // If there is a free structure on the stack, use it. mythread_sync(coder->mutex) { if (coder->threads_free != NULL) { coder->thr = coder->threads_free; coder->threads_free = coder->threads_free->next; } } if (coder->thr == NULL) { // If there are no uninitialized structures left, return. if (coder->threads_initialized == coder->threads_max) return LZMA_OK; // Initialize a new thread. return_if_error(initialize_new_thread(coder, allocator)); } // Reset the parts of the thread state that have to be done // in the main thread. mythread_sync(coder->thr->mutex) { coder->thr->state = THR_RUN; coder->thr->in_size = 0; coder->thr->outbuf = lzma_outq_get_buf(&coder->outq); mythread_cond_signal(&coder->thr->cond); } return LZMA_OK; } static lzma_ret stream_encode_in(lzma_stream_coder *coder, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, lzma_action action) { while (*in_pos < in_size || (coder->thr != NULL && action != LZMA_RUN)) { if (coder->thr == NULL) { // Get a new thread. const lzma_ret ret = get_thread(coder, allocator); if (coder->thr == NULL) return ret; } // Copy the input data to thread's buffer. size_t thr_in_size = coder->thr->in_size; lzma_bufcpy(in, in_pos, in_size, coder->thr->in, &thr_in_size, coder->block_size); // Tell the Block encoder to finish if // - it has got block_size bytes of input; or // - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH, // or LZMA_FULL_BARRIER was used. // // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. const bool finish = thr_in_size == coder->block_size || (*in_pos == in_size && action != LZMA_RUN); bool block_error = false; mythread_sync(coder->thr->mutex) { if (coder->thr->state == THR_IDLE) { // Something has gone wrong with the Block // encoder. It has set coder->thread_error // which we will read a few lines later. block_error = true; } else { // Tell the Block encoder its new amount // of input and update the state if needed. coder->thr->in_size = thr_in_size; if (finish) coder->thr->state = THR_FINISH; mythread_cond_signal(&coder->thr->cond); } } if (block_error) { lzma_ret ret; mythread_sync(coder->mutex) { ret = coder->thread_error; } return ret; } if (finish) coder->thr = NULL; } return LZMA_OK; } /// Wait until more input can be consumed, more output can be read, or /// an optional timeout is reached. static bool wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs, bool *has_blocked, bool has_input) { if (coder->timeout != 0 && !*has_blocked) { // Every time when stream_encode_mt() is called via // lzma_code(), *has_blocked starts as false. We set it // to true here and calculate the absolute time when // we must return if there's nothing to do. // // The idea of *has_blocked is to avoid unneeded calls // to mythread_condtime_set(), which may do a syscall // depending on the operating system. *has_blocked = true; mythread_condtime_set(wait_abs, &coder->cond, coder->timeout); } bool timed_out = false; mythread_sync(coder->mutex) { // There are four things that we wait. If one of them // becomes possible, we return. // - If there is input left, we need to get a free // worker thread and an output buffer for it. // - Data ready to be read from the output queue. // - A worker thread indicates an error. // - Time out occurs. while ((!has_input || coder->threads_free == NULL || !lzma_outq_has_buf(&coder->outq)) && !lzma_outq_is_readable(&coder->outq) && coder->thread_error == LZMA_OK && !timed_out) { if (coder->timeout != 0) timed_out = mythread_cond_timedwait( &coder->cond, &coder->mutex, wait_abs) != 0; else mythread_cond_wait(&coder->cond, &coder->mutex); } } return timed_out; } static lzma_ret stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_stream_coder *coder = coder_ptr; switch (coder->sequence) { case SEQ_STREAM_HEADER: lzma_bufcpy(coder->header, &coder->header_pos, sizeof(coder->header), out, out_pos, out_size); if (coder->header_pos < sizeof(coder->header)) return LZMA_OK; coder->header_pos = 0; coder->sequence = SEQ_BLOCK; // Fall through case SEQ_BLOCK: { // Initialized to silence warnings. lzma_vli unpadded_size = 0; lzma_vli uncompressed_size = 0; lzma_ret ret = LZMA_OK; // These are for wait_for_work(). bool has_blocked = false; mythread_condtime wait_abs; while (true) { mythread_sync(coder->mutex) { // Check for Block encoder errors. ret = coder->thread_error; if (ret != LZMA_OK) { assert(ret != LZMA_STREAM_END); break; // Break out of mythread_sync. } // Try to read compressed data to out[]. ret = lzma_outq_read(&coder->outq, out, out_pos, out_size, &unpadded_size, &uncompressed_size); } if (ret == LZMA_STREAM_END) { // End of Block. Add it to the Index. ret = lzma_index_append(coder->index, allocator, unpadded_size, uncompressed_size); + if (ret != LZMA_OK) { + threads_stop(coder, false); + return ret; + } // If we didn't fill the output buffer yet, // try to read more data. Maybe the next // outbuf has been finished already too. if (*out_pos < out_size) continue; } if (ret != LZMA_OK) { - // coder->thread_error was set or - // lzma_index_append() failed. + // coder->thread_error was set. threads_stop(coder, false); return ret; } // Try to give uncompressed data to a worker thread. ret = stream_encode_in(coder, allocator, in, in_pos, in_size, action); if (ret != LZMA_OK) { threads_stop(coder, false); return ret; } // See if we should wait or return. // // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER. if (*in_pos == in_size) { // LZMA_RUN: More data is probably coming // so return to let the caller fill the // input buffer. if (action == LZMA_RUN) return LZMA_OK; // LZMA_FULL_BARRIER: The same as with // LZMA_RUN but tell the caller that the // barrier was completed. if (action == LZMA_FULL_BARRIER) return LZMA_STREAM_END; // Finishing or flushing isn't completed until // all input data has been encoded and copied // to the output buffer. if (lzma_outq_is_empty(&coder->outq)) { // LZMA_FINISH: Continue to encode // the Index field. if (action == LZMA_FINISH) break; // LZMA_FULL_FLUSH: Return to tell // the caller that flushing was // completed. if (action == LZMA_FULL_FLUSH) return LZMA_STREAM_END; } } // Return if there is no output space left. // This check must be done after testing the input // buffer, because we might want to use a different // return code. if (*out_pos == out_size) return LZMA_OK; // Neither in nor out has been used completely. // Wait until there's something we can do. if (wait_for_work(coder, &wait_abs, &has_blocked, *in_pos < in_size)) return LZMA_TIMED_OUT; } // All Blocks have been encoded and the threads have stopped. // Prepare to encode the Index field. return_if_error(lzma_index_encoder_init( &coder->index_encoder, allocator, coder->index)); coder->sequence = SEQ_INDEX; // Update the progress info to take the Index and // Stream Footer into account. Those are very fast to encode // so in terms of progress information they can be thought // to be ready to be copied out. coder->progress_out += lzma_index_size(coder->index) + LZMA_STREAM_HEADER_SIZE; } // Fall through case SEQ_INDEX: { // Call the Index encoder. It doesn't take any input, so // those pointers can be NULL. const lzma_ret ret = coder->index_encoder.code( coder->index_encoder.coder, allocator, NULL, NULL, 0, out, out_pos, out_size, LZMA_RUN); if (ret != LZMA_STREAM_END) return ret; // Encode the Stream Footer into coder->buffer. coder->stream_flags.backward_size = lzma_index_size(coder->index); if (lzma_stream_footer_encode(&coder->stream_flags, coder->header) != LZMA_OK) return LZMA_PROG_ERROR; coder->sequence = SEQ_STREAM_FOOTER; } // Fall through case SEQ_STREAM_FOOTER: lzma_bufcpy(coder->header, &coder->header_pos, sizeof(coder->header), out, out_pos, out_size); return coder->header_pos < sizeof(coder->header) ? LZMA_OK : LZMA_STREAM_END; } assert(0); return LZMA_PROG_ERROR; } static void stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_stream_coder *coder = coder_ptr; // Threads must be killed before the output queue can be freed. threads_end(coder, allocator); lzma_outq_end(&coder->outq, allocator); for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) lzma_free(coder->filters[i].options, allocator); lzma_next_end(&coder->index_encoder, allocator); lzma_index_end(coder->index, allocator); mythread_cond_destroy(&coder->cond); mythread_mutex_destroy(&coder->mutex); lzma_free(coder, allocator); return; } /// Options handling for lzma_stream_encoder_mt_init() and /// lzma_stream_encoder_mt_memusage() static lzma_ret get_options(const lzma_mt *options, lzma_options_easy *opt_easy, const lzma_filter **filters, uint64_t *block_size, uint64_t *outbuf_size_max) { // Validate some of the options. if (options == NULL) return LZMA_PROG_ERROR; if (options->flags != 0 || options->threads == 0 || options->threads > LZMA_THREADS_MAX) return LZMA_OPTIONS_ERROR; if (options->filters != NULL) { // Filter chain was given, use it as is. *filters = options->filters; } else { // Use a preset. if (lzma_easy_preset(opt_easy, options->preset)) return LZMA_OPTIONS_ERROR; *filters = opt_easy->filters; } // Block size if (options->block_size > 0) { if (options->block_size > BLOCK_SIZE_MAX) return LZMA_OPTIONS_ERROR; *block_size = options->block_size; } else { // Determine the Block size from the filter chain. *block_size = lzma_mt_block_size(*filters); if (*block_size == 0) return LZMA_OPTIONS_ERROR; assert(*block_size <= BLOCK_SIZE_MAX); } // Calculate the maximum amount output that a single output buffer // may need to hold. This is the same as the maximum total size of // a Block. *outbuf_size_max = lzma_block_buffer_bound64(*block_size); if (*outbuf_size_max == 0) return LZMA_MEM_ERROR; return LZMA_OK; } static void get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out) { lzma_stream_coder *coder = coder_ptr; // Lock coder->mutex to prevent finishing threads from moving their // progress info from the worker_thread structure to lzma_stream_coder. mythread_sync(coder->mutex) { *progress_in = coder->progress_in; *progress_out = coder->progress_out; for (size_t i = 0; i < coder->threads_initialized; ++i) { mythread_sync(coder->threads[i].mutex) { *progress_in += coder->threads[i].progress_in; *progress_out += coder->threads[i] .progress_out; } } } return; } static lzma_ret stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_mt *options) { lzma_next_coder_init(&stream_encoder_mt_init, next, allocator); // Get the filter chain. lzma_options_easy easy; const lzma_filter *filters; uint64_t block_size; uint64_t outbuf_size_max; return_if_error(get_options(options, &easy, &filters, &block_size, &outbuf_size_max)); #if SIZE_MAX < UINT64_MAX if (block_size > SIZE_MAX) return LZMA_MEM_ERROR; #endif // Validate the filter chain so that we can give an error in this // function instead of delaying it to the first call to lzma_code(). // The memory usage calculation verifies the filter chain as // a side effect so we take advantage of that. if (lzma_raw_encoder_memusage(filters) == UINT64_MAX) return LZMA_OPTIONS_ERROR; // Validate the Check ID. if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return LZMA_PROG_ERROR; if (!lzma_check_is_supported(options->check)) return LZMA_UNSUPPORTED_CHECK; // Allocate and initialize the base structure if needed. lzma_stream_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_stream_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; // For the mutex and condition variable initializations // the error handling has to be done here because // stream_encoder_mt_end() doesn't know if they have // already been initialized or not. if (mythread_mutex_init(&coder->mutex)) { lzma_free(coder, allocator); next->coder = NULL; return LZMA_MEM_ERROR; } if (mythread_cond_init(&coder->cond)) { mythread_mutex_destroy(&coder->mutex); lzma_free(coder, allocator); next->coder = NULL; return LZMA_MEM_ERROR; } next->code = &stream_encode_mt; next->end = &stream_encoder_mt_end; next->get_progress = &get_progress; // next->update = &stream_encoder_mt_update; coder->filters[0].id = LZMA_VLI_UNKNOWN; coder->index_encoder = LZMA_NEXT_CODER_INIT; coder->index = NULL; memzero(&coder->outq, sizeof(coder->outq)); coder->threads = NULL; coder->threads_max = 0; coder->threads_initialized = 0; } // Basic initializations coder->sequence = SEQ_STREAM_HEADER; coder->block_size = (size_t)(block_size); coder->thread_error = LZMA_OK; coder->thr = NULL; // Allocate the thread-specific base structures. assert(options->threads > 0); if (coder->threads_max != options->threads) { threads_end(coder, allocator); coder->threads = NULL; coder->threads_max = 0; coder->threads_initialized = 0; coder->threads_free = NULL; coder->threads = lzma_alloc( options->threads * sizeof(worker_thread), allocator); if (coder->threads == NULL) return LZMA_MEM_ERROR; coder->threads_max = options->threads; } else { // Reuse the old structures and threads. Tell the running // threads to stop and wait until they have stopped. threads_stop(coder, true); } // Output queue return_if_error(lzma_outq_init(&coder->outq, allocator, outbuf_size_max, options->threads)); // Timeout coder->timeout = options->timeout; // Free the old filter chain and copy the new one. for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) lzma_free(coder->filters[i].options, allocator); return_if_error(lzma_filters_copy( filters, coder->filters, allocator)); // Index lzma_index_end(coder->index, allocator); coder->index = lzma_index_init(allocator); if (coder->index == NULL) return LZMA_MEM_ERROR; // Stream Header coder->stream_flags.version = 0; coder->stream_flags.check = options->check; return_if_error(lzma_stream_header_encode( &coder->stream_flags, coder->header)); coder->header_pos = 0; // Progress info coder->progress_in = 0; coder->progress_out = LZMA_STREAM_HEADER_SIZE; return LZMA_OK; } +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +// These are for compatibility with binaries linked against liblzma that +// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. +// Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2 +// but it has been added here anyway since someone might misread the +// RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist. +LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha", + lzma_ret, lzma_stream_encoder_mt_512a)( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result + __attribute__((__alias__("lzma_stream_encoder_mt_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2", + lzma_ret, lzma_stream_encoder_mt_522)( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result + __attribute__((__alias__("lzma_stream_encoder_mt_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2", + lzma_ret, lzma_stream_encoder_mt_52)( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result; + +#define lzma_stream_encoder_mt lzma_stream_encoder_mt_52 +#endif extern LZMA_API(lzma_ret) lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options) { lzma_next_strm_init(stream_encoder_mt_init, strm, options); strm->internal->supported_actions[LZMA_RUN] = true; // strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; strm->internal->supported_actions[LZMA_FULL_BARRIER] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } +#ifdef HAVE_SYMBOL_VERSIONS_LINUX +LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha", + uint64_t, lzma_stream_encoder_mt_memusage_512a)( + const lzma_mt *options) lzma_nothrow lzma_attr_pure + __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2", + uint64_t, lzma_stream_encoder_mt_memusage_522)( + const lzma_mt *options) lzma_nothrow lzma_attr_pure + __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52"))); + +LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2", + uint64_t, lzma_stream_encoder_mt_memusage_52)( + const lzma_mt *options) lzma_nothrow lzma_attr_pure; + +#define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52 +#endif // This function name is a monster but it's consistent with the older // monster names. :-( 31 chars is the max that C99 requires so in that // sense it's not too long. ;-) extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage(const lzma_mt *options) { lzma_options_easy easy; const lzma_filter *filters; uint64_t block_size; uint64_t outbuf_size_max; if (get_options(options, &easy, &filters, &block_size, &outbuf_size_max) != LZMA_OK) return UINT64_MAX; // Memory usage of the input buffers const uint64_t inbuf_memusage = options->threads * block_size; // Memory usage of the filter encoders uint64_t filters_memusage = lzma_raw_encoder_memusage(filters); if (filters_memusage == UINT64_MAX) return UINT64_MAX; filters_memusage *= options->threads; // Memory usage of the output queue const uint64_t outq_memusage = lzma_outq_memusage( outbuf_size_max, options->threads); if (outq_memusage == UINT64_MAX) return UINT64_MAX; // Sum them with overflow checking. uint64_t total_memusage = LZMA_MEMUSAGE_BASE + sizeof(lzma_stream_coder) + options->threads * sizeof(worker_thread); if (UINT64_MAX - total_memusage < inbuf_memusage) return UINT64_MAX; total_memusage += inbuf_memusage; if (UINT64_MAX - total_memusage < filters_memusage) return UINT64_MAX; total_memusage += filters_memusage; if (UINT64_MAX - total_memusage < outq_memusage) return UINT64_MAX; return total_memusage + outq_memusage; } diff --git a/contrib/xz/src/liblzma/liblzma.map b/contrib/xz/src/liblzma/liblzma_generic.map similarity index 100% copy from contrib/xz/src/liblzma/liblzma.map copy to contrib/xz/src/liblzma/liblzma_generic.map index f53a4ea30a3c..8cca05bdeead 100644 --- a/contrib/xz/src/liblzma/liblzma.map +++ b/contrib/xz/src/liblzma/liblzma_generic.map @@ -1,108 +1,108 @@ XZ_5.0 { global: lzma_alone_decoder; lzma_alone_encoder; lzma_auto_decoder; lzma_block_buffer_bound; lzma_block_buffer_decode; lzma_block_buffer_encode; lzma_block_compressed_size; lzma_block_decoder; lzma_block_encoder; lzma_block_header_decode; lzma_block_header_encode; lzma_block_header_size; lzma_block_total_size; lzma_block_unpadded_size; lzma_check_is_supported; lzma_check_size; lzma_code; lzma_crc32; lzma_crc64; lzma_easy_buffer_encode; lzma_easy_decoder_memusage; lzma_easy_encoder; lzma_easy_encoder_memusage; lzma_end; lzma_filter_decoder_is_supported; lzma_filter_encoder_is_supported; lzma_filter_flags_decode; lzma_filter_flags_encode; lzma_filter_flags_size; lzma_filters_copy; lzma_filters_update; lzma_get_check; lzma_index_append; lzma_index_block_count; lzma_index_buffer_decode; lzma_index_buffer_encode; lzma_index_cat; lzma_index_checks; lzma_index_decoder; lzma_index_dup; lzma_index_encoder; lzma_index_end; lzma_index_file_size; lzma_index_hash_append; lzma_index_hash_decode; lzma_index_hash_end; lzma_index_hash_init; lzma_index_hash_size; lzma_index_init; lzma_index_iter_init; lzma_index_iter_locate; lzma_index_iter_next; lzma_index_iter_rewind; lzma_index_memusage; lzma_index_memused; lzma_index_size; lzma_index_stream_count; lzma_index_stream_flags; lzma_index_stream_padding; lzma_index_stream_size; lzma_index_total_size; lzma_index_uncompressed_size; lzma_lzma_preset; lzma_memlimit_get; lzma_memlimit_set; lzma_memusage; lzma_mf_is_supported; lzma_mode_is_supported; lzma_physmem; lzma_properties_decode; lzma_properties_encode; lzma_properties_size; lzma_raw_buffer_decode; lzma_raw_buffer_encode; lzma_raw_decoder; lzma_raw_decoder_memusage; lzma_raw_encoder; lzma_raw_encoder_memusage; lzma_stream_buffer_bound; lzma_stream_buffer_decode; lzma_stream_buffer_encode; lzma_stream_decoder; lzma_stream_encoder; lzma_stream_flags_compare; lzma_stream_footer_decode; lzma_stream_footer_encode; lzma_stream_header_decode; lzma_stream_header_encode; lzma_version_number; lzma_version_string; lzma_vli_decode; lzma_vli_encode; lzma_vli_size; + +local: + *; }; XZ_5.2 { global: lzma_block_uncomp_encode; lzma_cputhreads; lzma_get_progress; lzma_stream_encoder_mt; lzma_stream_encoder_mt_memusage; - -local: - *; } XZ_5.0; diff --git a/contrib/xz/src/liblzma/liblzma.map b/contrib/xz/src/liblzma/liblzma_linux.map similarity index 90% rename from contrib/xz/src/liblzma/liblzma.map rename to contrib/xz/src/liblzma/liblzma_linux.map index f53a4ea30a3c..4be882cc2914 100644 --- a/contrib/xz/src/liblzma/liblzma.map +++ b/contrib/xz/src/liblzma/liblzma_linux.map @@ -1,108 +1,123 @@ XZ_5.0 { global: lzma_alone_decoder; lzma_alone_encoder; lzma_auto_decoder; lzma_block_buffer_bound; lzma_block_buffer_decode; lzma_block_buffer_encode; lzma_block_compressed_size; lzma_block_decoder; lzma_block_encoder; lzma_block_header_decode; lzma_block_header_encode; lzma_block_header_size; lzma_block_total_size; lzma_block_unpadded_size; lzma_check_is_supported; lzma_check_size; lzma_code; lzma_crc32; lzma_crc64; lzma_easy_buffer_encode; lzma_easy_decoder_memusage; lzma_easy_encoder; lzma_easy_encoder_memusage; lzma_end; lzma_filter_decoder_is_supported; lzma_filter_encoder_is_supported; lzma_filter_flags_decode; lzma_filter_flags_encode; lzma_filter_flags_size; lzma_filters_copy; lzma_filters_update; lzma_get_check; lzma_index_append; lzma_index_block_count; lzma_index_buffer_decode; lzma_index_buffer_encode; lzma_index_cat; lzma_index_checks; lzma_index_decoder; lzma_index_dup; lzma_index_encoder; lzma_index_end; lzma_index_file_size; lzma_index_hash_append; lzma_index_hash_decode; lzma_index_hash_end; lzma_index_hash_init; lzma_index_hash_size; lzma_index_init; lzma_index_iter_init; lzma_index_iter_locate; lzma_index_iter_next; lzma_index_iter_rewind; lzma_index_memusage; lzma_index_memused; lzma_index_size; lzma_index_stream_count; lzma_index_stream_flags; lzma_index_stream_padding; lzma_index_stream_size; lzma_index_total_size; lzma_index_uncompressed_size; lzma_lzma_preset; lzma_memlimit_get; lzma_memlimit_set; lzma_memusage; lzma_mf_is_supported; lzma_mode_is_supported; lzma_physmem; lzma_properties_decode; lzma_properties_encode; lzma_properties_size; lzma_raw_buffer_decode; lzma_raw_buffer_encode; lzma_raw_decoder; lzma_raw_decoder_memusage; lzma_raw_encoder; lzma_raw_encoder_memusage; lzma_stream_buffer_bound; lzma_stream_buffer_decode; lzma_stream_buffer_encode; lzma_stream_decoder; lzma_stream_encoder; lzma_stream_flags_compare; lzma_stream_footer_decode; lzma_stream_footer_encode; lzma_stream_header_decode; lzma_stream_header_encode; lzma_version_number; lzma_version_string; lzma_vli_decode; lzma_vli_encode; lzma_vli_size; + +local: + *; }; XZ_5.2 { global: lzma_block_uncomp_encode; lzma_cputhreads; lzma_get_progress; lzma_stream_encoder_mt; lzma_stream_encoder_mt_memusage; +} XZ_5.0; -local: - *; +XZ_5.1.2alpha { +global: + lzma_stream_encoder_mt; + lzma_stream_encoder_mt_memusage; } XZ_5.0; + +XZ_5.2.2 { +global: + lzma_block_uncomp_encode; + lzma_cputhreads; + lzma_get_progress; + lzma_stream_encoder_mt; + lzma_stream_encoder_mt_memusage; +} XZ_5.1.2alpha; diff --git a/contrib/xz/src/liblzma/lz/lz_decoder.c b/contrib/xz/src/liblzma/lz/lz_decoder.c index 09b574388f59..ab6af0dd99b3 100644 --- a/contrib/xz/src/liblzma/lz/lz_decoder.c +++ b/contrib/xz/src/liblzma/lz/lz_decoder.c @@ -1,311 +1,317 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lz_decoder.c /// \brief LZ out window /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// // liblzma supports multiple LZ77-based filters. The LZ part is shared // between these filters. The LZ code takes care of dictionary handling // and passing the data between filters in the chain. The filter-specific // part decodes from the input buffer to the dictionary. #include "lz_decoder.h" typedef struct { /// Dictionary (history buffer) lzma_dict dict; /// The actual LZ-based decoder e.g. LZMA lzma_lz_decoder lz; /// Next filter in the chain, if any. Note that LZMA and LZMA2 are /// only allowed as the last filter, but the long-range filter in /// future can be in the middle of the chain. lzma_next_coder next; /// True if the next filter in the chain has returned LZMA_STREAM_END. bool next_finished; /// True if the LZ decoder (e.g. LZMA) has detected end of payload /// marker. This may become true before next_finished becomes true. bool this_finished; /// Temporary buffer needed when the LZ-based filter is not the last /// filter in the chain. The output of the next filter is first /// decoded into buffer[], which is then used as input for the actual /// LZ-based decoder. struct { size_t pos; size_t size; uint8_t buffer[LZMA_BUFFER_SIZE]; } temp; } lzma_coder; static void lz_decoder_reset(lzma_coder *coder) { coder->dict.pos = 0; coder->dict.full = 0; coder->dict.buf[coder->dict.size - 1] = '\0'; coder->dict.need_reset = false; return; } static lzma_ret decode_buffer(lzma_coder *coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { while (true) { // Wrap the dictionary if needed. if (coder->dict.pos == coder->dict.size) coder->dict.pos = 0; // Store the current dictionary position. It is needed to know // where to start copying to the out[] buffer. const size_t dict_start = coder->dict.pos; // Calculate how much we allow coder->lz.code() to decode. // It must not decode past the end of the dictionary // buffer, and we don't want it to decode more than is // actually needed to fill the out[] buffer. coder->dict.limit = coder->dict.pos + my_min(out_size - *out_pos, coder->dict.size - coder->dict.pos); // Call the coder->lz.code() to do the actual decoding. const lzma_ret ret = coder->lz.code( coder->lz.coder, &coder->dict, in, in_pos, in_size); // Copy the decoded data from the dictionary to the out[] // buffer. Do it conditionally because out can be NULL // (in which case copy_size is always 0). Calling memcpy() // with a null-pointer is undefined even if the third // argument is 0. const size_t copy_size = coder->dict.pos - dict_start; assert(copy_size <= out_size - *out_pos); if (copy_size > 0) memcpy(out + *out_pos, coder->dict.buf + dict_start, copy_size); *out_pos += copy_size; // Reset the dictionary if so requested by coder->lz.code(). if (coder->dict.need_reset) { lz_decoder_reset(coder); // Since we reset dictionary, we don't check if // dictionary became full. if (ret != LZMA_OK || *out_pos == out_size) return ret; } else { // Return if everything got decoded or an error // occurred, or if there's no more data to decode. // // Note that detecting if there's something to decode // is done by looking if dictionary become full // instead of looking if *in_pos == in_size. This // is because it is possible that all the input was // consumed already but some data is pending to be // written to the dictionary. if (ret != LZMA_OK || *out_pos == out_size || coder->dict.pos < coder->dict.size) return ret; } } } static lzma_ret lz_decode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_coder *coder = coder_ptr; if (coder->next.code == NULL) return decode_buffer(coder, in, in_pos, in_size, out, out_pos, out_size); // We aren't the last coder in the chain, we need to decode // our input to a temporary buffer. while (*out_pos < out_size) { // Fill the temporary buffer if it is empty. if (!coder->next_finished && coder->temp.pos == coder->temp.size) { coder->temp.pos = 0; coder->temp.size = 0; const lzma_ret ret = coder->next.code( coder->next.coder, allocator, in, in_pos, in_size, coder->temp.buffer, &coder->temp.size, LZMA_BUFFER_SIZE, action); if (ret == LZMA_STREAM_END) coder->next_finished = true; else if (ret != LZMA_OK || coder->temp.size == 0) return ret; } if (coder->this_finished) { if (coder->temp.size != 0) return LZMA_DATA_ERROR; if (coder->next_finished) return LZMA_STREAM_END; return LZMA_OK; } const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, &coder->temp.pos, coder->temp.size, out, out_pos, out_size); if (ret == LZMA_STREAM_END) coder->this_finished = true; else if (ret != LZMA_OK) return ret; else if (coder->next_finished && *out_pos < out_size) return LZMA_DATA_ERROR; } return LZMA_OK; } static void lz_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_coder *coder = coder_ptr; lzma_next_end(&coder->next, allocator); lzma_free(coder->dict.buf, allocator); if (coder->lz.end != NULL) coder->lz.end(coder->lz.coder, allocator); else lzma_free(coder->lz.coder, allocator); lzma_free(coder, allocator); return; } extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)) { // Allocate the base structure if it isn't already allocated. lzma_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &lz_decode; next->end = &lz_decoder_end; coder->dict.buf = NULL; coder->dict.size = 0; coder->lz = LZMA_LZ_DECODER_INIT; coder->next = LZMA_NEXT_CODER_INIT; } // Allocate and initialize the LZ-based decoder. It will also give // us the dictionary size. lzma_lz_options lz_options; return_if_error(lz_init(&coder->lz, allocator, filters[0].options, &lz_options)); // If the dictionary size is very small, increase it to 4096 bytes. // This is to prevent constant wrapping of the dictionary, which // would slow things down. The downside is that since we don't check // separately for the real dictionary size, we may happily accept // corrupt files. if (lz_options.dict_size < 4096) lz_options.dict_size = 4096; // Make dictionary size a multiple of 16. Some LZ-based decoders like // LZMA use the lowest bits lzma_dict.pos to know the alignment of the // data. Aligned buffer is also good when memcpying from the // dictionary to the output buffer, since applications are // recommended to give aligned buffers to liblzma. // // Avoid integer overflow. if (lz_options.dict_size > SIZE_MAX - 15) return LZMA_MEM_ERROR; lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); // Allocate and initialize the dictionary. if (coder->dict.size != lz_options.dict_size) { lzma_free(coder->dict.buf, allocator); coder->dict.buf = lzma_alloc(lz_options.dict_size, allocator); if (coder->dict.buf == NULL) return LZMA_MEM_ERROR; coder->dict.size = lz_options.dict_size; } lz_decoder_reset(next->coder); // Use the preset dictionary if it was given to us. if (lz_options.preset_dict != NULL && lz_options.preset_dict_size > 0) { // If the preset dictionary is bigger than the actual // dictionary, copy only the tail. const size_t copy_size = my_min(lz_options.preset_dict_size, lz_options.dict_size); const size_t offset = lz_options.preset_dict_size - copy_size; memcpy(coder->dict.buf, lz_options.preset_dict + offset, copy_size); coder->dict.pos = copy_size; coder->dict.full = copy_size; } // Miscellaneous initializations coder->next_finished = false; coder->this_finished = false; coder->temp.pos = 0; coder->temp.size = 0; // Initialize the next filter in the chain, if any. return lzma_next_filter_init(&coder->next, allocator, filters + 1); } extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size) { return sizeof(lzma_coder) + (uint64_t)(dictionary_size); } extern void -lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) +lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, + bool allow_eopm) { lzma_coder *coder = coder_ptr; - coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size); + + if (uncompressed_size == LZMA_VLI_UNKNOWN) + allow_eopm = true; + + coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size, + allow_eopm); } diff --git a/contrib/xz/src/liblzma/lz/lz_decoder.h b/contrib/xz/src/liblzma/lz/lz_decoder.h index 754ccf37c6a4..e6d7ab2a4107 100644 --- a/contrib/xz/src/liblzma/lz/lz_decoder.h +++ b/contrib/xz/src/liblzma/lz/lz_decoder.h @@ -1,234 +1,236 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lz_decoder.h /// \brief LZ out window /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_DECODER_H #define LZMA_LZ_DECODER_H #include "common.h" typedef struct { /// Pointer to the dictionary buffer. It can be an allocated buffer /// internal to liblzma, or it can a be a buffer given by the /// application when in single-call mode (not implemented yet). uint8_t *buf; /// Write position in dictionary. The next byte will be written to /// buf[pos]. size_t pos; /// Indicates how full the dictionary is. This is used by /// dict_is_distance_valid() to detect corrupt files that would /// read beyond the beginning of the dictionary. size_t full; /// Write limit size_t limit; /// Size of the dictionary size_t size; /// True when dictionary should be reset before decoding more data. bool need_reset; } lzma_dict; typedef struct { size_t dict_size; const uint8_t *preset_dict; size_t preset_dict_size; } lzma_lz_options; typedef struct { /// Data specific to the LZ-based decoder void *coder; /// Function to decode from in[] to *dict lzma_ret (*code)(void *coder, lzma_dict *restrict dict, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size); void (*reset)(void *coder, const void *options); - /// Set the uncompressed size - void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size); + /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN + /// then allow_eopm will always be true. + void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size, + bool allow_eopm); /// Free allocated resources void (*end)(void *coder, const lzma_allocator *allocator); } lzma_lz_decoder; #define LZMA_LZ_DECODER_INIT \ (lzma_lz_decoder){ \ .coder = NULL, \ .code = NULL, \ .reset = NULL, \ .set_uncompressed = NULL, \ .end = NULL, \ } extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)); extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); extern void lzma_lz_decoder_uncompressed( - void *coder, lzma_vli uncompressed_size); + void *coder, lzma_vli uncompressed_size, bool allow_eopm); ////////////////////// // Inline functions // ////////////////////// /// Get a byte from the history buffer. static inline uint8_t dict_get(const lzma_dict *const dict, const uint32_t distance) { return dict->buf[dict->pos - distance - 1 + (distance < dict->pos ? 0 : dict->size)]; } /// Test if dictionary is empty. static inline bool dict_is_empty(const lzma_dict *const dict) { return dict->full == 0; } /// Validate the match distance static inline bool dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) { return dict->full > distance; } /// Repeat *len bytes at distance. static inline bool dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) { // Don't write past the end of the dictionary. const size_t dict_avail = dict->limit - dict->pos; uint32_t left = my_min(dict_avail, *len); *len -= left; // Repeat a block of data from the history. Because memcpy() is faster // than copying byte by byte in a loop, the copying process gets split // into three cases. if (distance < left) { // Source and target areas overlap, thus we can't use // memcpy() nor even memmove() safely. do { dict->buf[dict->pos] = dict_get(dict, distance); ++dict->pos; } while (--left > 0); } else if (distance < dict->pos) { // The easiest and fastest case memcpy(dict->buf + dict->pos, dict->buf + dict->pos - distance - 1, left); dict->pos += left; } else { // The bigger the dictionary, the more rare this // case occurs. We need to "wrap" the dict, thus // we might need two memcpy() to copy all the data. assert(dict->full == dict->size); const uint32_t copy_pos = dict->pos - distance - 1 + dict->size; uint32_t copy_size = dict->size - copy_pos; if (copy_size < left) { memmove(dict->buf + dict->pos, dict->buf + copy_pos, copy_size); dict->pos += copy_size; copy_size = left - copy_size; memcpy(dict->buf + dict->pos, dict->buf, copy_size); dict->pos += copy_size; } else { memmove(dict->buf + dict->pos, dict->buf + copy_pos, left); dict->pos += left; } } // Update how full the dictionary is. if (dict->full < dict->pos) dict->full = dict->pos; return unlikely(*len != 0); } /// Puts one byte into the dictionary. Returns true if the dictionary was /// already full and the byte couldn't be added. static inline bool dict_put(lzma_dict *dict, uint8_t byte) { if (unlikely(dict->pos == dict->limit)) return true; dict->buf[dict->pos++] = byte; if (dict->pos > dict->full) dict->full = dict->pos; return false; } /// Copies arbitrary amount of data into the dictionary. static inline void dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, size_t *restrict left) { // NOTE: If we are being given more data than the size of the // dictionary, it could be possible to optimize the LZ decoder // so that not everything needs to go through the dictionary. // This shouldn't be very common thing in practice though, and // the slowdown of one extra memcpy() isn't bad compared to how // much time it would have taken if the data were compressed. if (in_size - *in_pos > *left) in_size = *in_pos + *left; *left -= lzma_bufcpy(in, in_pos, in_size, dict->buf, &dict->pos, dict->limit); if (dict->pos > dict->full) dict->full = dict->pos; return; } static inline void dict_reset(lzma_dict *dict) { dict->need_reset = true; return; } #endif diff --git a/contrib/xz/src/liblzma/lz/lz_encoder.c b/contrib/xz/src/liblzma/lz/lz_encoder.c index 9a74b7c47ce8..e6a49fb19e31 100644 --- a/contrib/xz/src/liblzma/lz/lz_encoder.c +++ b/contrib/xz/src/liblzma/lz/lz_encoder.c @@ -1,616 +1,612 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder.c /// \brief LZ in window /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" #include "lz_encoder_hash.h" // See lz_encoder_hash.h. This is a bit hackish but avoids making // endianness a conditional in makefiles. #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL) # include "lz_encoder_hash_table.h" #endif #include "memcmplen.h" typedef struct { /// LZ-based encoder e.g. LZMA lzma_lz_encoder lz; /// History buffer and match finder lzma_mf mf; /// Next coder in the chain lzma_next_coder next; } lzma_coder; /// \brief Moves the data in the input window to free space for new data /// /// mf->buffer is a sliding input window, which keeps mf->keep_size_before /// bytes of input history available all the time. Now and then we need to /// "slide" the buffer to make space for the new data to the end of the /// buffer. At the same time, data older than keep_size_before is dropped. /// static void move_window(lzma_mf *mf) { // Align the move to a multiple of 16 bytes. Some LZ-based encoders // like LZMA use the lowest bits of mf->read_pos to know the // alignment of the uncompressed data. We also get better speed // for memmove() with aligned buffers. assert(mf->read_pos > mf->keep_size_before); const uint32_t move_offset = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); assert(mf->write_pos > move_offset); const size_t move_size = mf->write_pos - move_offset; assert(move_offset + move_size <= mf->size); memmove(mf->buffer, mf->buffer + move_offset, move_size); mf->offset += move_offset; mf->read_pos -= move_offset; mf->read_limit -= move_offset; mf->write_pos -= move_offset; return; } /// \brief Tries to fill the input window (mf->buffer) /// /// If we are the last encoder in the chain, our input data is in in[]. /// Otherwise we call the next filter in the chain to process in[] and /// write its output to mf->buffer. /// /// This function must not be called once it has returned LZMA_STREAM_END. /// static lzma_ret fill_window(lzma_coder *coder, const lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, lzma_action action) { assert(coder->mf.read_pos <= coder->mf.write_pos); // Move the sliding window if needed. if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) move_window(&coder->mf); // Maybe this is ugly, but lzma_mf uses uint32_t for most things // (which I find cleanest), but we need size_t here when filling // the history window. size_t write_pos = coder->mf.write_pos; lzma_ret ret; if (coder->next.code == NULL) { // Not using a filter, simply memcpy() as much as possible. lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, &write_pos, coder->mf.size); ret = action != LZMA_RUN && *in_pos == in_size ? LZMA_STREAM_END : LZMA_OK; } else { ret = coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, coder->mf.buffer, &write_pos, coder->mf.size, action); } coder->mf.write_pos = write_pos; // Silence Valgrind. lzma_memcmplen() can read extra bytes // and Valgrind will give warnings if those bytes are uninitialized // because Valgrind cannot see that the values of the uninitialized // bytes are eventually ignored. memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA); // If end of stream has been reached or flushing completed, we allow // the encoder to process all the input (that is, read_pos is allowed // to reach write_pos). Otherwise we keep keep_size_after bytes // available as prebuffer. if (ret == LZMA_STREAM_END) { assert(*in_pos == in_size); ret = LZMA_OK; coder->mf.action = action; coder->mf.read_limit = coder->mf.write_pos; } else if (coder->mf.write_pos > coder->mf.keep_size_after) { // This needs to be done conditionally, because if we got // only little new input, there may be too little input // to do any encoding yet. coder->mf.read_limit = coder->mf.write_pos - coder->mf.keep_size_after; } // Restart the match finder after finished LZMA_SYNC_FLUSH. if (coder->mf.pending > 0 && coder->mf.read_pos < coder->mf.read_limit) { // Match finder may update coder->pending and expects it to // start from zero, so use a temporary variable. const uint32_t pending = coder->mf.pending; coder->mf.pending = 0; // Rewind read_pos so that the match finder can hash // the pending bytes. assert(coder->mf.read_pos >= pending); coder->mf.read_pos -= pending; // Call the skip function directly instead of using // mf_skip(), since we don't want to touch mf->read_ahead. coder->mf.skip(&coder->mf, pending); } return ret; } static lzma_ret lz_encode(void *coder_ptr, const lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { lzma_coder *coder = coder_ptr; while (*out_pos < out_size && (*in_pos < in_size || action != LZMA_RUN)) { // Read more data to coder->mf.buffer if needed. if (coder->mf.action == LZMA_RUN && coder->mf.read_pos >= coder->mf.read_limit) return_if_error(fill_window(coder, allocator, in, in_pos, in_size, action)); // Encode const lzma_ret ret = coder->lz.code(coder->lz.coder, &coder->mf, out, out_pos, out_size); if (ret != LZMA_OK) { // Setting this to LZMA_RUN for cases when we are // flushing. It doesn't matter when finishing or if // an error occurred. coder->mf.action = LZMA_RUN; return ret; } } return LZMA_OK; } static bool lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, const lzma_lz_options *lz_options) { // For now, the dictionary size is limited to 1.5 GiB. This may grow // in the future if needed, but it needs a little more work than just // changing this check. if (lz_options->dict_size < LZMA_DICT_SIZE_MIN || lz_options->dict_size > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) || lz_options->nice_len > lz_options->match_len_max) return true; mf->keep_size_before = lz_options->before_size + lz_options->dict_size; mf->keep_size_after = lz_options->after_size + lz_options->match_len_max; // To avoid constant memmove()s, allocate some extra space. Since // memmove()s become more expensive when the size of the buffer // increases, we reserve more space when a large dictionary is // used to make the memmove() calls rarer. // // This works with dictionaries up to about 3 GiB. If bigger // dictionary is wanted, some extra work is needed: // - Several variables in lzma_mf have to be changed from uint32_t // to size_t. // - Memory usage calculation needs something too, e.g. use uint64_t // for mf->size. uint32_t reserve = lz_options->dict_size / 2; if (reserve > (UINT32_C(1) << 30)) reserve /= 2; reserve += (lz_options->before_size + lz_options->match_len_max + lz_options->after_size) / 2 + (UINT32_C(1) << 19); const uint32_t old_size = mf->size; mf->size = mf->keep_size_before + reserve + mf->keep_size_after; // Deallocate the old history buffer if it exists but has different // size than what is needed now. if (mf->buffer != NULL && old_size != mf->size) { lzma_free(mf->buffer, allocator); mf->buffer = NULL; } // Match finder options mf->match_len_max = lz_options->match_len_max; mf->nice_len = lz_options->nice_len; // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't // mean limiting dictionary size to less than 2 GiB. With a match // finder that uses multibyte resolution (hashes start at e.g. every // fourth byte), cyclic_size would stay below 2 Gi even when // dictionary size is greater than 2 GiB. // // It would be possible to allow cyclic_size >= 2 Gi, but then we // would need to be careful to use 64-bit types in various places // (size_t could do since we would need bigger than 32-bit address // space anyway). It would also require either zeroing a multigigabyte // buffer at initialization (waste of time and RAM) or allow // normalization in lz_encoder_mf.c to access uninitialized // memory to keep the code simpler. The current way is simple and // still allows pretty big dictionaries, so I don't expect these // limits to change. mf->cyclic_size = lz_options->dict_size + 1; // Validate the match finder ID and setup the function pointers. switch (lz_options->match_finder) { #ifdef HAVE_MF_HC3 case LZMA_MF_HC3: mf->find = &lzma_mf_hc3_find; mf->skip = &lzma_mf_hc3_skip; break; #endif #ifdef HAVE_MF_HC4 case LZMA_MF_HC4: mf->find = &lzma_mf_hc4_find; mf->skip = &lzma_mf_hc4_skip; break; #endif #ifdef HAVE_MF_BT2 case LZMA_MF_BT2: mf->find = &lzma_mf_bt2_find; mf->skip = &lzma_mf_bt2_skip; break; #endif #ifdef HAVE_MF_BT3 case LZMA_MF_BT3: mf->find = &lzma_mf_bt3_find; mf->skip = &lzma_mf_bt3_skip; break; #endif #ifdef HAVE_MF_BT4 case LZMA_MF_BT4: mf->find = &lzma_mf_bt4_find; mf->skip = &lzma_mf_bt4_skip; break; #endif default: return true; } // Calculate the sizes of mf->hash and mf->son and check that // nice_len is big enough for the selected match finder. const uint32_t hash_bytes = lz_options->match_finder & 0x0F; if (hash_bytes > mf->nice_len) return true; const bool is_bt = (lz_options->match_finder & 0x10) != 0; uint32_t hs; if (hash_bytes == 2) { hs = 0xFFFF; } else { // Round dictionary size up to the next 2^n - 1 so it can // be used as a hash mask. hs = lz_options->dict_size - 1; hs |= hs >> 1; hs |= hs >> 2; hs |= hs >> 4; hs |= hs >> 8; hs >>= 1; hs |= 0xFFFF; if (hs > (UINT32_C(1) << 24)) { if (hash_bytes == 3) hs = (UINT32_C(1) << 24) - 1; else hs >>= 1; } } mf->hash_mask = hs; ++hs; if (hash_bytes > 2) hs += HASH_2_SIZE; if (hash_bytes > 3) hs += HASH_3_SIZE; /* No match finder uses this at the moment. if (mf->hash_bytes > 4) hs += HASH_4_SIZE; */ const uint32_t old_hash_count = mf->hash_count; const uint32_t old_sons_count = mf->sons_count; mf->hash_count = hs; mf->sons_count = mf->cyclic_size; if (is_bt) mf->sons_count *= 2; // Deallocate the old hash array if it exists and has different size // than what is needed now. if (old_hash_count != mf->hash_count || old_sons_count != mf->sons_count) { lzma_free(mf->hash, allocator); mf->hash = NULL; lzma_free(mf->son, allocator); mf->son = NULL; } // Maximum number of match finder cycles mf->depth = lz_options->depth; if (mf->depth == 0) { if (is_bt) mf->depth = 16 + mf->nice_len / 2; else mf->depth = 4 + mf->nice_len / 4; } return false; } static bool lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator, const lzma_lz_options *lz_options) { // Allocate the history buffer. if (mf->buffer == NULL) { // lzma_memcmplen() is used for the dictionary buffer // so we need to allocate a few extra bytes to prevent // it from reading past the end of the buffer. mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA, allocator); if (mf->buffer == NULL) return true; // Keep Valgrind happy with lzma_memcmplen() and initialize // the extra bytes whose value may get read but which will // effectively get ignored. memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA); } // Use cyclic_size as initial mf->offset. This allows // avoiding a few branches in the match finders. The downside is // that match finder needs to be normalized more often, which may // hurt performance with huge dictionaries. mf->offset = mf->cyclic_size; mf->read_pos = 0; mf->read_ahead = 0; mf->read_limit = 0; mf->write_pos = 0; mf->pending = 0; #if UINT32_MAX >= SIZE_MAX / 4 // Check for integer overflow. (Huge dictionaries are not // possible on 32-bit CPU.) if (mf->hash_count > SIZE_MAX / sizeof(uint32_t) || mf->sons_count > SIZE_MAX / sizeof(uint32_t)) return true; #endif // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash. // // We don't need to initialize mf->son, but not doing that may // make Valgrind complain in normalization (see normalize() in // lz_encoder_mf.c). Skipping the initialization is *very* good // when big dictionary is used but only small amount of data gets // actually compressed: most of the mf->son won't get actually // allocated by the kernel, so we avoid wasting RAM and improve // initialization speed a lot. if (mf->hash == NULL) { mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t), allocator); mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t), allocator); if (mf->hash == NULL || mf->son == NULL) { lzma_free(mf->hash, allocator); mf->hash = NULL; lzma_free(mf->son, allocator); mf->son = NULL; return true; } } else { /* for (uint32_t i = 0; i < mf->hash_count; ++i) mf->hash[i] = EMPTY_HASH_VALUE; */ memzero(mf->hash, mf->hash_count * sizeof(uint32_t)); } mf->cyclic_pos = 0; // Handle preset dictionary. if (lz_options->preset_dict != NULL && lz_options->preset_dict_size > 0) { // If the preset dictionary is bigger than the actual // dictionary, use only the tail. mf->write_pos = my_min(lz_options->preset_dict_size, mf->size); memcpy(mf->buffer, lz_options->preset_dict + lz_options->preset_dict_size - mf->write_pos, mf->write_pos); mf->action = LZMA_SYNC_FLUSH; mf->skip(mf, mf->write_pos); } mf->action = LZMA_RUN; return false; } extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) { // Old buffers must not exist when calling lz_encoder_prepare(). lzma_mf mf = { .buffer = NULL, .hash = NULL, .son = NULL, .hash_count = 0, .sons_count = 0, }; // Setup the size information into mf. if (lz_encoder_prepare(&mf, NULL, lz_options)) return UINT64_MAX; // Calculate the memory usage. return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t) + mf.size + sizeof(lzma_coder); } static void lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_coder *coder = coder_ptr; lzma_next_end(&coder->next, allocator); lzma_free(coder->mf.son, allocator); lzma_free(coder->mf.hash, allocator); lzma_free(coder->mf.buffer, allocator); if (coder->lz.end != NULL) coder->lz.end(coder->lz.coder, allocator); else lzma_free(coder->lz.coder, allocator); lzma_free(coder, allocator); return; } static lzma_ret lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator, const lzma_filter *filters_null lzma_attribute((__unused__)), const lzma_filter *reversed_filters) { lzma_coder *coder = coder_ptr; if (coder->lz.options_update == NULL) return LZMA_PROG_ERROR; return_if_error(coder->lz.options_update( coder->lz.coder, reversed_filters)); return lzma_next_filter_update( &coder->next, allocator, reversed_filters + 1); } extern lzma_ret lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_encoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options)) { #ifdef HAVE_SMALL // We need that the CRC32 table has been initialized. lzma_crc32_init(); #endif // Allocate and initialize the base data structure. lzma_coder *coder = next->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; next->coder = coder; next->code = &lz_encode; next->end = &lz_encoder_end; next->update = &lz_encoder_update; coder->lz.coder = NULL; coder->lz.code = NULL; coder->lz.end = NULL; // mf.size is initialized to silence Valgrind // when used on optimized binaries (GCC may reorder // code in a way that Valgrind gets unhappy). coder->mf.buffer = NULL; coder->mf.size = 0; coder->mf.hash = NULL; coder->mf.son = NULL; coder->mf.hash_count = 0; coder->mf.sons_count = 0; coder->next = LZMA_NEXT_CODER_INIT; } // Initialize the LZ-based encoder. lzma_lz_options lz_options; return_if_error(lz_init(&coder->lz, allocator, filters[0].options, &lz_options)); // Setup the size information into coder->mf and deallocate // old buffers if they have wrong size. if (lz_encoder_prepare(&coder->mf, allocator, &lz_options)) return LZMA_OPTIONS_ERROR; // Allocate new buffers if needed, and do the rest of // the initialization. if (lz_encoder_init(&coder->mf, allocator, &lz_options)) return LZMA_MEM_ERROR; // Initialize the next filter in the chain, if any. return lzma_next_filter_init(&coder->next, allocator, filters + 1); } extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder mf) { - bool ret = false; - + switch (mf) { #ifdef HAVE_MF_HC3 - if (mf == LZMA_MF_HC3) - ret = true; + case LZMA_MF_HC3: + return true; #endif - #ifdef HAVE_MF_HC4 - if (mf == LZMA_MF_HC4) - ret = true; + case LZMA_MF_HC4: + return true; #endif - #ifdef HAVE_MF_BT2 - if (mf == LZMA_MF_BT2) - ret = true; + case LZMA_MF_BT2: + return true; #endif - #ifdef HAVE_MF_BT3 - if (mf == LZMA_MF_BT3) - ret = true; + case LZMA_MF_BT3: + return true; #endif - #ifdef HAVE_MF_BT4 - if (mf == LZMA_MF_BT4) - ret = true; + case LZMA_MF_BT4: + return true; #endif - - return ret; + default: + return false; + } } diff --git a/contrib/xz/src/liblzma/lzma/lzma2_decoder.c b/contrib/xz/src/liblzma/lzma/lzma2_decoder.c index cf1b5110aca8..105a28dc97ed 100644 --- a/contrib/xz/src/liblzma/lzma/lzma2_decoder.c +++ b/contrib/xz/src/liblzma/lzma/lzma2_decoder.c @@ -1,310 +1,310 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma2_decoder.c /// \brief LZMA2 decoder /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lzma2_decoder.h" #include "lz_decoder.h" #include "lzma_decoder.h" typedef struct { enum sequence { SEQ_CONTROL, SEQ_UNCOMPRESSED_1, SEQ_UNCOMPRESSED_2, SEQ_COMPRESSED_0, SEQ_COMPRESSED_1, SEQ_PROPERTIES, SEQ_LZMA, SEQ_COPY, } sequence; /// Sequence after the size fields have been decoded. enum sequence next_sequence; /// LZMA decoder lzma_lz_decoder lzma; /// Uncompressed size of LZMA chunk size_t uncompressed_size; /// Compressed size of the chunk (naturally equals to uncompressed /// size of uncompressed chunk) size_t compressed_size; /// True if properties are needed. This is false before the /// first LZMA chunk. bool need_properties; /// True if dictionary reset is needed. This is false before the /// first chunk (LZMA or uncompressed). bool need_dictionary_reset; lzma_options_lzma options; } lzma_lzma2_coder; static lzma_ret lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { lzma_lzma2_coder *restrict coder = coder_ptr; // With SEQ_LZMA it is possible that no new input is needed to do // some progress. The rest of the sequences assume that there is // at least one byte of input. while (*in_pos < in_size || coder->sequence == SEQ_LZMA) switch (coder->sequence) { case SEQ_CONTROL: { const uint32_t control = in[*in_pos]; ++*in_pos; // End marker if (control == 0x00) return LZMA_STREAM_END; if (control >= 0xE0 || control == 1) { // Dictionary reset implies that next LZMA chunk has // to set new properties. coder->need_properties = true; coder->need_dictionary_reset = true; } else if (coder->need_dictionary_reset) { return LZMA_DATA_ERROR; } if (control >= 0x80) { // LZMA chunk. The highest five bits of the // uncompressed size are taken from the control byte. coder->uncompressed_size = (control & 0x1F) << 16; coder->sequence = SEQ_UNCOMPRESSED_1; // See if there are new properties or if we need to // reset the state. if (control >= 0xC0) { // When there are new properties, state reset // is done at SEQ_PROPERTIES. coder->need_properties = false; coder->next_sequence = SEQ_PROPERTIES; } else if (coder->need_properties) { return LZMA_DATA_ERROR; } else { coder->next_sequence = SEQ_LZMA; // If only state reset is wanted with old // properties, do the resetting here for // simplicity. if (control >= 0xA0) coder->lzma.reset(coder->lzma.coder, &coder->options); } } else { // Invalid control values if (control > 2) return LZMA_DATA_ERROR; // It's uncompressed chunk coder->sequence = SEQ_COMPRESSED_0; coder->next_sequence = SEQ_COPY; } if (coder->need_dictionary_reset) { // Finish the dictionary reset and let the caller // flush the dictionary to the actual output buffer. coder->need_dictionary_reset = false; dict_reset(dict); return LZMA_OK; } break; } case SEQ_UNCOMPRESSED_1: coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; coder->sequence = SEQ_UNCOMPRESSED_2; break; case SEQ_UNCOMPRESSED_2: coder->uncompressed_size += in[(*in_pos)++] + 1U; coder->sequence = SEQ_COMPRESSED_0; coder->lzma.set_uncompressed(coder->lzma.coder, - coder->uncompressed_size); + coder->uncompressed_size, false); break; case SEQ_COMPRESSED_0: coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; coder->sequence = SEQ_COMPRESSED_1; break; case SEQ_COMPRESSED_1: coder->compressed_size += in[(*in_pos)++] + 1U; coder->sequence = coder->next_sequence; break; case SEQ_PROPERTIES: if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) return LZMA_DATA_ERROR; coder->lzma.reset(coder->lzma.coder, &coder->options); coder->sequence = SEQ_LZMA; break; case SEQ_LZMA: { // Store the start offset so that we can update // coder->compressed_size later. const size_t in_start = *in_pos; // Decode from in[] to *dict. const lzma_ret ret = coder->lzma.code(coder->lzma.coder, dict, in, in_pos, in_size); // Validate and update coder->compressed_size. const size_t in_used = *in_pos - in_start; if (in_used > coder->compressed_size) return LZMA_DATA_ERROR; coder->compressed_size -= in_used; // Return if we didn't finish the chunk, or an error occurred. if (ret != LZMA_STREAM_END) return ret; // The LZMA decoder must have consumed the whole chunk now. // We don't need to worry about uncompressed size since it // is checked by the LZMA decoder. if (coder->compressed_size != 0) return LZMA_DATA_ERROR; coder->sequence = SEQ_CONTROL; break; } case SEQ_COPY: { // Copy from input to the dictionary as is. dict_write(dict, in, in_pos, in_size, &coder->compressed_size); if (coder->compressed_size != 0) return LZMA_OK; coder->sequence = SEQ_CONTROL; break; } default: assert(0); return LZMA_PROG_ERROR; } return LZMA_OK; } static void lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_lzma2_coder *coder = coder_ptr; assert(coder->lzma.end == NULL); lzma_free(coder->lzma.coder, allocator); lzma_free(coder, allocator); return; } static lzma_ret lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *opt, lzma_lz_options *lz_options) { lzma_lzma2_coder *coder = lz->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; lz->coder = coder; lz->code = &lzma2_decode; lz->end = &lzma2_decoder_end; coder->lzma = LZMA_LZ_DECODER_INIT; } const lzma_options_lzma *options = opt; coder->sequence = SEQ_CONTROL; coder->need_properties = true; coder->need_dictionary_reset = options->preset_dict == NULL || options->preset_dict_size == 0; return lzma_lzma_decoder_create(&coder->lzma, allocator, options, lz_options); } extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { // LZMA2 can only be the last filter in the chain. This is enforced // by the raw_decoder initialization. assert(filters[1].init == NULL); return lzma_lz_decoder_init(next, allocator, filters, &lzma2_decoder_init); } extern uint64_t lzma_lzma2_decoder_memusage(const void *options) { return sizeof(lzma_lzma2_coder) + lzma_lzma_decoder_memusage_nocheck(options); } extern lzma_ret lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, const uint8_t *props, size_t props_size) { if (props_size != 1) return LZMA_OPTIONS_ERROR; // Check that reserved bits are unset. if (props[0] & 0xC0) return LZMA_OPTIONS_ERROR; // Decode the dictionary size. if (props[0] > 40) return LZMA_OPTIONS_ERROR; lzma_options_lzma *opt = lzma_alloc( sizeof(lzma_options_lzma), allocator); if (opt == NULL) return LZMA_MEM_ERROR; if (props[0] == 40) { opt->dict_size = UINT32_MAX; } else { opt->dict_size = 2 | (props[0] & 1U); opt->dict_size <<= props[0] / 2U + 11; } opt->preset_dict = NULL; opt->preset_dict_size = 0; *options = opt; return LZMA_OK; } diff --git a/contrib/xz/src/liblzma/lzma/lzma2_encoder.c b/contrib/xz/src/liblzma/lzma/lzma2_encoder.c index 63588ee30c6b..6914f2793b26 100644 --- a/contrib/xz/src/liblzma/lzma/lzma2_encoder.c +++ b/contrib/xz/src/liblzma/lzma/lzma2_encoder.c @@ -1,410 +1,413 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma2_encoder.c /// \brief LZMA2 encoder /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" #include "lzma_encoder.h" #include "fastpos.h" #include "lzma2_encoder.h" typedef struct { enum { SEQ_INIT, SEQ_LZMA_ENCODE, SEQ_LZMA_COPY, SEQ_UNCOMPRESSED_HEADER, SEQ_UNCOMPRESSED_COPY, } sequence; /// LZMA encoder void *lzma; /// LZMA options currently in use. lzma_options_lzma opt_cur; bool need_properties; bool need_state_reset; bool need_dictionary_reset; /// Uncompressed size of a chunk size_t uncompressed_size; /// Compressed size of a chunk (excluding headers); this is also used /// to indicate the end of buf[] in SEQ_LZMA_COPY. size_t compressed_size; /// Read position in buf[] size_t buf_pos; /// Buffer to hold the chunk header and LZMA compressed data uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; } lzma_lzma2_coder; static void lzma2_header_lzma(lzma_lzma2_coder *coder) { assert(coder->uncompressed_size > 0); assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); assert(coder->compressed_size > 0); assert(coder->compressed_size <= LZMA2_CHUNK_MAX); size_t pos; if (coder->need_properties) { pos = 0; if (coder->need_dictionary_reset) coder->buf[pos] = 0x80 + (3 << 5); else coder->buf[pos] = 0x80 + (2 << 5); } else { pos = 1; if (coder->need_state_reset) coder->buf[pos] = 0x80 + (1 << 5); else coder->buf[pos] = 0x80; } // Set the start position for copying. coder->buf_pos = pos; // Uncompressed size size_t size = coder->uncompressed_size - 1; coder->buf[pos++] += size >> 16; coder->buf[pos++] = (size >> 8) & 0xFF; coder->buf[pos++] = size & 0xFF; // Compressed size size = coder->compressed_size - 1; coder->buf[pos++] = size >> 8; coder->buf[pos++] = size & 0xFF; // Properties, if needed if (coder->need_properties) lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); coder->need_properties = false; coder->need_state_reset = false; coder->need_dictionary_reset = false; // The copying code uses coder->compressed_size to indicate the end // of coder->buf[], so we need add the maximum size of the header here. coder->compressed_size += LZMA2_HEADER_MAX; return; } static void lzma2_header_uncompressed(lzma_lzma2_coder *coder) { assert(coder->uncompressed_size > 0); assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); // If this is the first chunk, we need to include dictionary // reset indicator. if (coder->need_dictionary_reset) coder->buf[0] = 1; else coder->buf[0] = 2; coder->need_dictionary_reset = false; // "Compressed" size coder->buf[1] = (coder->uncompressed_size - 1) >> 8; coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; // Set the start position for copying. coder->buf_pos = 0; return; } static lzma_ret lzma2_encode(void *coder_ptr, lzma_mf *restrict mf, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { lzma_lzma2_coder *restrict coder = coder_ptr; while (*out_pos < out_size) switch (coder->sequence) { case SEQ_INIT: // If there's no input left and we are flushing or finishing, // don't start a new chunk. if (mf_unencoded(mf) == 0) { // Write end of payload marker if finishing. if (mf->action == LZMA_FINISH) out[(*out_pos)++] = 0; return mf->action == LZMA_RUN ? LZMA_OK : LZMA_STREAM_END; } if (coder->need_state_reset) return_if_error(lzma_lzma_encoder_reset( coder->lzma, &coder->opt_cur)); coder->uncompressed_size = 0; coder->compressed_size = 0; coder->sequence = SEQ_LZMA_ENCODE; // Fall through case SEQ_LZMA_ENCODE: { // Calculate how much more uncompressed data this chunk // could accept. const uint32_t left = LZMA2_UNCOMPRESSED_MAX - coder->uncompressed_size; uint32_t limit; if (left < mf->match_len_max) { // Must flush immediately since the next LZMA symbol // could make the uncompressed size of the chunk too // big. limit = 0; } else { // Calculate maximum read_limit that is OK from point // of view of LZMA2 chunk size. limit = mf->read_pos - mf->read_ahead + left - mf->match_len_max; } // Save the start position so that we can update // coder->uncompressed_size. const uint32_t read_start = mf->read_pos - mf->read_ahead; // Call the LZMA encoder until the chunk is finished. const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, coder->buf + LZMA2_HEADER_MAX, &coder->compressed_size, LZMA2_CHUNK_MAX, limit); coder->uncompressed_size += mf->read_pos - mf->read_ahead - read_start; assert(coder->compressed_size <= LZMA2_CHUNK_MAX); assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); if (ret != LZMA_STREAM_END) return LZMA_OK; // See if the chunk compressed. If it didn't, we encode it // as uncompressed chunk. This saves a few bytes of space // and makes decoding faster. if (coder->compressed_size >= coder->uncompressed_size) { coder->uncompressed_size += mf->read_ahead; assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); mf->read_ahead = 0; lzma2_header_uncompressed(coder); coder->need_state_reset = true; coder->sequence = SEQ_UNCOMPRESSED_HEADER; break; } // The chunk did compress at least by one byte, so we store // the chunk as LZMA. lzma2_header_lzma(coder); coder->sequence = SEQ_LZMA_COPY; } // Fall through case SEQ_LZMA_COPY: // Copy the compressed chunk along its headers to the // output buffer. lzma_bufcpy(coder->buf, &coder->buf_pos, coder->compressed_size, out, out_pos, out_size); if (coder->buf_pos != coder->compressed_size) return LZMA_OK; coder->sequence = SEQ_INIT; break; case SEQ_UNCOMPRESSED_HEADER: // Copy the three-byte header to indicate uncompressed chunk. lzma_bufcpy(coder->buf, &coder->buf_pos, LZMA2_HEADER_UNCOMPRESSED, out, out_pos, out_size); if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) return LZMA_OK; coder->sequence = SEQ_UNCOMPRESSED_COPY; // Fall through case SEQ_UNCOMPRESSED_COPY: // Copy the uncompressed data as is from the dictionary // to the output buffer. mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); if (coder->uncompressed_size != 0) return LZMA_OK; coder->sequence = SEQ_INIT; break; } return LZMA_OK; } static void lzma2_encoder_end(void *coder_ptr, const lzma_allocator *allocator) { lzma_lzma2_coder *coder = coder_ptr; lzma_free(coder->lzma, allocator); lzma_free(coder, allocator); return; } static lzma_ret lzma2_encoder_options_update(void *coder_ptr, const lzma_filter *filter) { lzma_lzma2_coder *coder = coder_ptr; // New options can be set only when there is no incomplete chunk. // This is the case at the beginning of the raw stream and right // after LZMA_SYNC_FLUSH. if (filter->options == NULL || coder->sequence != SEQ_INIT) return LZMA_PROG_ERROR; // Look if there are new options. At least for now, // only lc/lp/pb can be changed. const lzma_options_lzma *opt = filter->options; if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp || coder->opt_cur.pb != opt->pb) { // Validate the options. if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX || opt->lc + opt->lp > LZMA_LCLP_MAX || opt->pb > LZMA_PB_MAX) return LZMA_OPTIONS_ERROR; // The new options will be used when the encoder starts // a new LZMA2 chunk. coder->opt_cur.lc = opt->lc; coder->opt_cur.lp = opt->lp; coder->opt_cur.pb = opt->pb; coder->need_properties = true; coder->need_state_reset = true; } return LZMA_OK; } static lzma_ret lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options) { if (options == NULL) return LZMA_PROG_ERROR; lzma_lzma2_coder *coder = lz->coder; if (coder == NULL) { coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); if (coder == NULL) return LZMA_MEM_ERROR; lz->coder = coder; lz->code = &lzma2_encode; lz->end = &lzma2_encoder_end; lz->options_update = &lzma2_encoder_options_update; coder->lzma = NULL; } coder->opt_cur = *(const lzma_options_lzma *)(options); coder->sequence = SEQ_INIT; coder->need_properties = true; coder->need_state_reset = false; coder->need_dictionary_reset = coder->opt_cur.preset_dict == NULL || coder->opt_cur.preset_dict_size == 0; // Initialize LZMA encoder return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator, &coder->opt_cur, lz_options)); // Make sure that we will always have enough history available in // case we need to use uncompressed chunks. They are used when the // compressed size of a chunk is not smaller than the uncompressed // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes // history available. if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) lz_options->before_size = LZMA2_CHUNK_MAX - lz_options->dict_size; return LZMA_OK; } extern lzma_ret lzma_lzma2_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return lzma_lz_encoder_init( next, allocator, filters, &lzma2_encoder_init); } extern uint64_t lzma_lzma2_encoder_memusage(const void *options) { const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); if (lzma_mem == UINT64_MAX) return UINT64_MAX; return sizeof(lzma_lzma2_coder) + lzma_mem; } extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out) { + if (options == NULL) + return LZMA_PROG_ERROR; + const lzma_options_lzma *const opt = options; uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN); // Round up to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending // on which one is the next: --d; d |= d >> 2; d |= d >> 3; d |= d >> 4; d |= d >> 8; d |= d >> 16; // Get the highest two bits using the proper encoding: if (d == UINT32_MAX) out[0] = 40; else out[0] = get_dist_slot(d + 1) - 24; return LZMA_OK; } extern uint64_t lzma_lzma2_block_size(const void *options) { const lzma_options_lzma *const opt = options; // Use at least 1 MiB to keep compression ratio better. return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20); } diff --git a/contrib/xz/src/liblzma/lzma/lzma_decoder.c b/contrib/xz/src/liblzma/lzma/lzma_decoder.c index e605a0a916b2..f56ccaf77fd3 100644 --- a/contrib/xz/src/liblzma/lzma/lzma_decoder.c +++ b/contrib/xz/src/liblzma/lzma/lzma_decoder.c @@ -1,1064 +1,1110 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_decoder.c /// \brief LZMA decoder /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lz_decoder.h" #include "lzma_common.h" #include "lzma_decoder.h" #include "range_decoder.h" // The macros unroll loops with switch statements. // Silence warnings about missing fall-through comments. #if TUKLIB_GNUC_REQ(7, 0) # pragma GCC diagnostic ignored "-Wimplicit-fallthrough" #endif #ifdef HAVE_SMALL // Macros for (somewhat) size-optimized code. #define seq_4(seq) seq #define seq_6(seq) seq #define seq_8(seq) seq #define seq_len(seq) \ seq ## _CHOICE, \ seq ## _CHOICE2, \ seq ## _BITTREE #define len_decode(target, ld, pos_state, seq) \ do { \ case seq ## _CHOICE: \ rc_if_0(ld.choice, seq ## _CHOICE) { \ rc_update_0(ld.choice); \ probs = ld.low[pos_state];\ limit = LEN_LOW_SYMBOLS; \ target = MATCH_LEN_MIN; \ } else { \ rc_update_1(ld.choice); \ case seq ## _CHOICE2: \ rc_if_0(ld.choice2, seq ## _CHOICE2) { \ rc_update_0(ld.choice2); \ probs = ld.mid[pos_state]; \ limit = LEN_MID_SYMBOLS; \ target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ rc_update_1(ld.choice2); \ probs = ld.high; \ limit = LEN_HIGH_SYMBOLS; \ target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + LEN_MID_SYMBOLS; \ } \ } \ symbol = 1; \ case seq ## _BITTREE: \ do { \ rc_bit(probs[symbol], , , seq ## _BITTREE); \ } while (symbol < limit); \ target += symbol - limit; \ } while (0) #else // HAVE_SMALL // Unrolled versions #define seq_4(seq) \ seq ## 0, \ seq ## 1, \ seq ## 2, \ seq ## 3 #define seq_6(seq) \ seq ## 0, \ seq ## 1, \ seq ## 2, \ seq ## 3, \ seq ## 4, \ seq ## 5 #define seq_8(seq) \ seq ## 0, \ seq ## 1, \ seq ## 2, \ seq ## 3, \ seq ## 4, \ seq ## 5, \ seq ## 6, \ seq ## 7 #define seq_len(seq) \ seq ## _CHOICE, \ seq ## _LOW0, \ seq ## _LOW1, \ seq ## _LOW2, \ seq ## _CHOICE2, \ seq ## _MID0, \ seq ## _MID1, \ seq ## _MID2, \ seq ## _HIGH0, \ seq ## _HIGH1, \ seq ## _HIGH2, \ seq ## _HIGH3, \ seq ## _HIGH4, \ seq ## _HIGH5, \ seq ## _HIGH6, \ seq ## _HIGH7 #define len_decode(target, ld, pos_state, seq) \ do { \ symbol = 1; \ case seq ## _CHOICE: \ rc_if_0(ld.choice, seq ## _CHOICE) { \ rc_update_0(ld.choice); \ rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ } else { \ rc_update_1(ld.choice); \ case seq ## _CHOICE2: \ rc_if_0(ld.choice2, seq ## _CHOICE2) { \ rc_update_0(ld.choice2); \ rc_bit_case(ld.mid[pos_state][symbol], , , \ seq ## _MID0); \ rc_bit_case(ld.mid[pos_state][symbol], , , \ seq ## _MID1); \ rc_bit_case(ld.mid[pos_state][symbol], , , \ seq ## _MID2); \ target = symbol - LEN_MID_SYMBOLS \ + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ rc_update_1(ld.choice2); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ target = symbol - LEN_HIGH_SYMBOLS \ + MATCH_LEN_MIN \ + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ } \ } \ } while (0) #endif // HAVE_SMALL /// Length decoder probabilities; see comments in lzma_common.h. typedef struct { probability choice; probability choice2; probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; probability high[LEN_HIGH_SYMBOLS]; } lzma_length_decoder; typedef struct { /////////////////// // Probabilities // /////////////////// /// Literals; see comments in lzma_common.h. probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; /// If 1, it's a match. Otherwise it's a single 8-bit literal. probability is_match[STATES][POS_STATES_MAX]; /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. probability is_rep[STATES]; /// If 0, distance of a repeated match is rep0. /// Otherwise check is_rep1. probability is_rep0[STATES]; /// If 0, distance of a repeated match is rep1. /// Otherwise check is_rep2. probability is_rep1[STATES]; /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. probability is_rep2[STATES]; /// If 1, the repeated match has length of one byte. Otherwise /// the length is decoded from rep_len_decoder. probability is_rep0_long[STATES][POS_STATES_MAX]; /// Probability tree for the highest two bits of the match distance. /// There is a separate probability tree for match lengths of /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. probability dist_slot[DIST_STATES][DIST_SLOTS]; /// Probability trees for additional bits for match distance when the /// distance is in the range [4, 127]. probability pos_special[FULL_DISTANCES - DIST_MODEL_END]; /// Probability tree for the lowest four bits of a match distance /// that is equal to or greater than 128. probability pos_align[ALIGN_SIZE]; /// Length of a normal match lzma_length_decoder match_len_decoder; /// Length of a repeated match lzma_length_decoder rep_len_decoder; /////////////////// // Decoder state // /////////////////// // Range coder lzma_range_decoder rc; // Types of the most recently seen LZMA symbols lzma_lzma_state state; uint32_t rep0; ///< Distance of the latest match uint32_t rep1; ///< Distance of second latest match uint32_t rep2; ///< Distance of third latest match uint32_t rep3; ///< Distance of fourth latest match uint32_t pos_mask; // (1U << pb) - 1 uint32_t literal_context_bits; uint32_t literal_pos_mask; /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of /// payload marker is expected. lzma_vli uncompressed_size; + /// True if end of payload marker (EOPM) is allowed even when + /// uncompressed_size is known; false if EOPM must not be present. + /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN. + bool allow_eopm; + //////////////////////////////// // State of incomplete symbol // //////////////////////////////// /// Position where to continue the decoder loop enum { SEQ_NORMALIZE, SEQ_IS_MATCH, seq_8(SEQ_LITERAL), seq_8(SEQ_LITERAL_MATCHED), SEQ_LITERAL_WRITE, SEQ_IS_REP, seq_len(SEQ_MATCH_LEN), seq_6(SEQ_DIST_SLOT), SEQ_DIST_MODEL, SEQ_DIRECT, seq_4(SEQ_ALIGN), SEQ_EOPM, SEQ_IS_REP0, SEQ_SHORTREP, SEQ_IS_REP0_LONG, SEQ_IS_REP1, SEQ_IS_REP2, seq_len(SEQ_REP_LEN), SEQ_COPY, } sequence; /// Base of the current probability tree probability *probs; /// Symbol being decoded. This is also used as an index variable in /// bittree decoders: probs[symbol] uint32_t symbol; /// Used as a loop termination condition on bittree decoders and /// direct bits decoder. uint32_t limit; /// Matched literal decoder: 0x100 or 0 to help avoiding branches. /// Bittree reverse decoders: Offset of the next bit: 1 << offset uint32_t offset; /// If decoding a literal: match byte. /// If decoding a match: length of the match. uint32_t len; } lzma_lzma1_decoder; static lzma_ret lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { lzma_lzma1_decoder *restrict coder = coder_ptr; //////////////////// // Initialization // //////////////////// { const lzma_ret ret = rc_read_init( &coder->rc, in, in_pos, in_size); if (ret != LZMA_STREAM_END) return ret; } /////////////// // Variables // /////////////// // Making local copies of often-used variables improves both // speed and readability. lzma_dict dict = *dictptr; const size_t dict_start = dict.pos; // Range decoder rc_to_local(coder->rc, *in_pos); // State uint32_t state = coder->state; uint32_t rep0 = coder->rep0; uint32_t rep1 = coder->rep1; uint32_t rep2 = coder->rep2; uint32_t rep3 = coder->rep3; const uint32_t pos_mask = coder->pos_mask; // These variables are actually needed only if we last time ran // out of input in the middle of the decoder loop. probability *probs = coder->probs; uint32_t symbol = coder->symbol; uint32_t limit = coder->limit; uint32_t offset = coder->offset; uint32_t len = coder->len; const uint32_t literal_pos_mask = coder->literal_pos_mask; const uint32_t literal_context_bits = coder->literal_context_bits; // Temporary variables uint32_t pos_state = dict.pos & pos_mask; lzma_ret ret = LZMA_OK; - // If uncompressed size is known, there must be no end of payload - // marker. - const bool no_eopm = coder->uncompressed_size - != LZMA_VLI_UNKNOWN; - if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + // This is true when the next LZMA symbol is allowed to be EOPM. + // That is, if this is false, then EOPM is considered + // an invalid symbol and we will return LZMA_DATA_ERROR. + // + // EOPM is always required (not just allowed) when + // the uncompressed size isn't known. When uncompressed size + // is known, eopm_is_valid may be set to true later. + bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN; + + // If uncompressed size is known and there is enough output space + // to decode all the data, limit the available buffer space so that + // the main loop won't try to decode past the end of the stream. + bool might_finish_without_eopm = false; + if (coder->uncompressed_size != LZMA_VLI_UNKNOWN + && coder->uncompressed_size <= dict.limit - dict.pos) { dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + might_finish_without_eopm = true; + } // The main decoder loop. The "switch" is used to restart the decoder at // correct location. Once restarted, the "switch" is no longer used. switch (coder->sequence) while (true) { // Calculate new pos_state. This is skipped on the first loop // since we already calculated it when setting up the local // variables. pos_state = dict.pos & pos_mask; case SEQ_NORMALIZE: case SEQ_IS_MATCH: - if (unlikely(no_eopm && dict.pos == dict.limit)) - break; + if (unlikely(might_finish_without_eopm + && dict.pos == dict.limit)) { + // In rare cases there is a useless byte that needs + // to be read anyway. + rc_normalize(SEQ_NORMALIZE); + + // If the range decoder state is such that we can + // be at the end of the LZMA stream, then the + // decoding is finished. + if (rc_is_finished(rc)) { + ret = LZMA_STREAM_END; + goto out; + } + + // If the caller hasn't allowed EOPM to be present + // together with known uncompressed size, then the + // LZMA stream is corrupt. + if (!coder->allow_eopm) { + ret = LZMA_DATA_ERROR; + goto out; + } + + // Otherwise continue decoding with the expectation + // that the next LZMA symbol is EOPM. + eopm_is_valid = true; + } rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { rc_update_0(coder->is_match[state][pos_state]); // It's a literal i.e. a single 8-bit byte. probs = literal_subcoder(coder->literal, literal_context_bits, literal_pos_mask, dict.pos, dict_get(&dict, 0)); symbol = 1; if (is_literal_state(state)) { // Decode literal without match byte. #ifdef HAVE_SMALL case SEQ_LITERAL: do { rc_bit(probs[symbol], , , SEQ_LITERAL); } while (symbol < (1 << 8)); #else rc_bit_case(probs[symbol], , , SEQ_LITERAL0); rc_bit_case(probs[symbol], , , SEQ_LITERAL1); rc_bit_case(probs[symbol], , , SEQ_LITERAL2); rc_bit_case(probs[symbol], , , SEQ_LITERAL3); rc_bit_case(probs[symbol], , , SEQ_LITERAL4); rc_bit_case(probs[symbol], , , SEQ_LITERAL5); rc_bit_case(probs[symbol], , , SEQ_LITERAL6); rc_bit_case(probs[symbol], , , SEQ_LITERAL7); #endif } else { // Decode literal with match byte. // // We store the byte we compare against // ("match byte") to "len" to minimize the // number of variables we need to store // between decoder calls. len = (uint32_t)(dict_get(&dict, rep0)) << 1; // The usage of "offset" allows omitting some // branches, which should give tiny speed // improvement on some CPUs. "offset" gets // set to zero if match_bit didn't match. offset = 0x100; #ifdef HAVE_SMALL case SEQ_LITERAL_MATCHED: do { const uint32_t match_bit = len & offset; const uint32_t subcoder_index = offset + match_bit + symbol; rc_bit(probs[subcoder_index], offset &= ~match_bit, offset &= match_bit, SEQ_LITERAL_MATCHED); // It seems to be faster to do this // here instead of putting it to the // beginning of the loop and then // putting the "case" in the middle // of the loop. len <<= 1; } while (symbol < (1 << 8)); #else // Unroll the loop. uint32_t match_bit; uint32_t subcoder_index; # define d(seq) \ case seq: \ match_bit = len & offset; \ subcoder_index = offset + match_bit + symbol; \ rc_bit(probs[subcoder_index], \ offset &= ~match_bit, \ offset &= match_bit, \ seq) d(SEQ_LITERAL_MATCHED0); len <<= 1; d(SEQ_LITERAL_MATCHED1); len <<= 1; d(SEQ_LITERAL_MATCHED2); len <<= 1; d(SEQ_LITERAL_MATCHED3); len <<= 1; d(SEQ_LITERAL_MATCHED4); len <<= 1; d(SEQ_LITERAL_MATCHED5); len <<= 1; d(SEQ_LITERAL_MATCHED6); len <<= 1; d(SEQ_LITERAL_MATCHED7); # undef d #endif } //update_literal(state); // Use a lookup table to update to literal state, // since compared to other state updates, this would // need two branches. static const lzma_lzma_state next_state[] = { STATE_LIT_LIT, STATE_LIT_LIT, STATE_LIT_LIT, STATE_LIT_LIT, STATE_MATCH_LIT_LIT, STATE_REP_LIT_LIT, STATE_SHORTREP_LIT_LIT, STATE_MATCH_LIT, STATE_REP_LIT, STATE_SHORTREP_LIT, STATE_MATCH_LIT, STATE_REP_LIT }; state = next_state[state]; case SEQ_LITERAL_WRITE: if (unlikely(dict_put(&dict, symbol))) { coder->sequence = SEQ_LITERAL_WRITE; goto out; } continue; } // Instead of a new byte we are going to get a byte range // (distance and length) which will be repeated from our // output history. rc_update_1(coder->is_match[state][pos_state]); case SEQ_IS_REP: rc_if_0(coder->is_rep[state], SEQ_IS_REP) { // Not a repeated match rc_update_0(coder->is_rep[state]); update_match(state); // The latest three match distances are kept in // memory in case there are repeated matches. rep3 = rep2; rep2 = rep1; rep1 = rep0; // Decode the length of the match. len_decode(len, coder->match_len_decoder, pos_state, SEQ_MATCH_LEN); // Prepare to decode the highest two bits of the // match distance. probs = coder->dist_slot[get_dist_state(len)]; symbol = 1; #ifdef HAVE_SMALL case SEQ_DIST_SLOT: do { rc_bit(probs[symbol], , , SEQ_DIST_SLOT); } while (symbol < DIST_SLOTS); #else rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0); rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1); rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2); rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3); rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4); rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5); #endif // Get rid of the highest bit that was needed for // indexing of the probability array. symbol -= DIST_SLOTS; assert(symbol <= 63); if (symbol < DIST_MODEL_START) { // Match distances [0, 3] have only two bits. rep0 = symbol; } else { // Decode the lowest [1, 29] bits of // the match distance. limit = (symbol >> 1) - 1; assert(limit >= 1 && limit <= 30); rep0 = 2 + (symbol & 1); if (symbol < DIST_MODEL_END) { // Prepare to decode the low bits for // a distance of [4, 127]. assert(limit <= 5); rep0 <<= limit; assert(rep0 <= 96); // -1 is fine, because we start // decoding at probs[1], not probs[0]. // NOTE: This violates the C standard, // since we are doing pointer // arithmetic past the beginning of // the array. assert((int32_t)(rep0 - symbol - 1) >= -1); assert((int32_t)(rep0 - symbol - 1) <= 82); probs = coder->pos_special + rep0 - symbol - 1; symbol = 1; offset = 0; case SEQ_DIST_MODEL: #ifdef HAVE_SMALL do { rc_bit(probs[symbol], , rep0 += 1U << offset, SEQ_DIST_MODEL); } while (++offset < limit); #else switch (limit) { case 5: assert(offset == 0); rc_bit(probs[symbol], , rep0 += 1U, SEQ_DIST_MODEL); ++offset; --limit; case 4: rc_bit(probs[symbol], , rep0 += 1U << offset, SEQ_DIST_MODEL); ++offset; --limit; case 3: rc_bit(probs[symbol], , rep0 += 1U << offset, SEQ_DIST_MODEL); ++offset; --limit; case 2: rc_bit(probs[symbol], , rep0 += 1U << offset, SEQ_DIST_MODEL); ++offset; --limit; case 1: // We need "symbol" only for // indexing the probability // array, thus we can use // rc_bit_last() here to omit // the unneeded updating of // "symbol". rc_bit_last(probs[symbol], , rep0 += 1U << offset, SEQ_DIST_MODEL); } #endif } else { // The distance is >= 128. Decode the // lower bits without probabilities // except the lowest four bits. assert(symbol >= 14); assert(limit >= 6); limit -= ALIGN_BITS; assert(limit >= 2); case SEQ_DIRECT: // Not worth manual unrolling do { rc_direct(rep0, SEQ_DIRECT); } while (--limit > 0); // Decode the lowest four bits using // probabilities. rep0 <<= ALIGN_BITS; symbol = 1; #ifdef HAVE_SMALL offset = 0; case SEQ_ALIGN: do { rc_bit(coder->pos_align[ symbol], , rep0 += 1U << offset, SEQ_ALIGN); } while (++offset < ALIGN_BITS); #else case SEQ_ALIGN0: rc_bit(coder->pos_align[symbol], , rep0 += 1, SEQ_ALIGN0); case SEQ_ALIGN1: rc_bit(coder->pos_align[symbol], , rep0 += 2, SEQ_ALIGN1); case SEQ_ALIGN2: rc_bit(coder->pos_align[symbol], , rep0 += 4, SEQ_ALIGN2); case SEQ_ALIGN3: // Like in SEQ_DIST_MODEL, we don't // need "symbol" for anything else // than indexing the probability array. rc_bit_last(coder->pos_align[symbol], , rep0 += 8, SEQ_ALIGN3); #endif if (rep0 == UINT32_MAX) { // End of payload marker was - // found. It must not be - // present if uncompressed - // size is known. - if (coder->uncompressed_size - != LZMA_VLI_UNKNOWN) { + // found. It may only be + // present if + // - uncompressed size is + // unknown or + // - after known uncompressed + // size amount of bytes has + // been decompressed and + // caller has indicated + // that EOPM might be used + // (it's not allowed in + // LZMA2). + if (!eopm_is_valid) { ret = LZMA_DATA_ERROR; goto out; } case SEQ_EOPM: // LZMA1 stream with // end-of-payload marker. rc_normalize(SEQ_EOPM); - ret = LZMA_STREAM_END; + ret = rc_is_finished(rc) + ? LZMA_STREAM_END + : LZMA_DATA_ERROR; goto out; } } } // Validate the distance we just decoded. if (unlikely(!dict_is_distance_valid(&dict, rep0))) { ret = LZMA_DATA_ERROR; goto out; } } else { rc_update_1(coder->is_rep[state]); // Repeated match // // The match distance is a value that we have had // earlier. The latest four match distances are // available as rep0, rep1, rep2 and rep3. We will // now decode which of them is the new distance. // // There cannot be a match if we haven't produced // any output, so check that first. if (unlikely(!dict_is_distance_valid(&dict, 0))) { ret = LZMA_DATA_ERROR; goto out; } case SEQ_IS_REP0: rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { rc_update_0(coder->is_rep0[state]); // The distance is rep0. case SEQ_IS_REP0_LONG: rc_if_0(coder->is_rep0_long[state][pos_state], SEQ_IS_REP0_LONG) { rc_update_0(coder->is_rep0_long[ state][pos_state]); update_short_rep(state); case SEQ_SHORTREP: if (unlikely(dict_put(&dict, dict_get( &dict, rep0)))) { coder->sequence = SEQ_SHORTREP; goto out; } continue; } // Repeating more than one byte at // distance of rep0. rc_update_1(coder->is_rep0_long[ state][pos_state]); } else { rc_update_1(coder->is_rep0[state]); case SEQ_IS_REP1: // The distance is rep1, rep2 or rep3. Once // we find out which one of these three, it // is stored to rep0 and rep1, rep2 and rep3 // are updated accordingly. rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { rc_update_0(coder->is_rep1[state]); const uint32_t distance = rep1; rep1 = rep0; rep0 = distance; } else { rc_update_1(coder->is_rep1[state]); case SEQ_IS_REP2: rc_if_0(coder->is_rep2[state], SEQ_IS_REP2) { rc_update_0(coder->is_rep2[ state]); const uint32_t distance = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; } else { rc_update_1(coder->is_rep2[ state]); const uint32_t distance = rep3; rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; } } } update_long_rep(state); // Decode the length of the repeated match. len_decode(len, coder->rep_len_decoder, pos_state, SEQ_REP_LEN); } ///////////////////////////////// // Repeat from history buffer. // ///////////////////////////////// // The length is always between these limits. There is no way // to trigger the algorithm to set len outside this range. assert(len >= MATCH_LEN_MIN); assert(len <= MATCH_LEN_MAX); case SEQ_COPY: // Repeat len bytes from distance of rep0. if (unlikely(dict_repeat(&dict, rep0, &len))) { coder->sequence = SEQ_COPY; goto out; } } - rc_normalize(SEQ_NORMALIZE); - coder->sequence = SEQ_IS_MATCH; - out: // Save state // NOTE: Must not copy dict.limit. dictptr->pos = dict.pos; dictptr->full = dict.full; rc_from_local(coder->rc, *in_pos); coder->state = state; coder->rep0 = rep0; coder->rep1 = rep1; coder->rep2 = rep2; coder->rep3 = rep3; coder->probs = probs; coder->symbol = symbol; coder->limit = limit; coder->offset = offset; coder->len = len; // Update the remaining amount of uncompressed data if uncompressed // size was known. if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { coder->uncompressed_size -= dict.pos - dict_start; - // Since there cannot be end of payload marker if the - // uncompressed size was known, we check here if we - // finished decoding. + // If we have gotten all the output but the decoder wants + // to write more output, the file is corrupt. There are + // three SEQ values where output is produced. if (coder->uncompressed_size == 0 && ret == LZMA_OK - && coder->sequence != SEQ_NORMALIZE) - ret = coder->sequence == SEQ_IS_MATCH - ? LZMA_STREAM_END : LZMA_DATA_ERROR; + && (coder->sequence == SEQ_LITERAL_WRITE + || coder->sequence == SEQ_SHORTREP + || coder->sequence == SEQ_COPY)) + ret = LZMA_DATA_ERROR; } - // We can do an additional check in the range decoder to catch some - // corrupted files. if (ret == LZMA_STREAM_END) { - if (!rc_is_finished(coder->rc)) - ret = LZMA_DATA_ERROR; - // Reset the range decoder so that it is ready to reinitialize // for a new LZMA2 chunk. rc_reset(coder->rc); + coder->sequence = SEQ_IS_MATCH; } return ret; } static void -lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) +lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, + bool allow_eopm) { lzma_lzma1_decoder *coder = coder_ptr; coder->uncompressed_size = uncompressed_size; + coder->allow_eopm = allow_eopm; } static void lzma_decoder_reset(void *coder_ptr, const void *opt) { lzma_lzma1_decoder *coder = coder_ptr; const lzma_options_lzma *options = opt; // NOTE: We assume that lc/lp/pb are valid since they were // successfully decoded with lzma_lzma_decode_properties(). // Calculate pos_mask. We don't need pos_bits as is for anything. coder->pos_mask = (1U << options->pb) - 1; // Initialize the literal decoder. literal_init(coder->literal, options->lc, options->lp); coder->literal_context_bits = options->lc; coder->literal_pos_mask = (1U << options->lp) - 1; // State coder->state = STATE_LIT_LIT; coder->rep0 = 0; coder->rep1 = 0; coder->rep2 = 0; coder->rep3 = 0; coder->pos_mask = (1U << options->pb) - 1; // Range decoder rc_reset(coder->rc); // Bit and bittree decoders for (uint32_t i = 0; i < STATES; ++i) { for (uint32_t j = 0; j <= coder->pos_mask; ++j) { bit_reset(coder->is_match[i][j]); bit_reset(coder->is_rep0_long[i][j]); } bit_reset(coder->is_rep[i]); bit_reset(coder->is_rep0[i]); bit_reset(coder->is_rep1[i]); bit_reset(coder->is_rep2[i]); } for (uint32_t i = 0; i < DIST_STATES; ++i) bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) bit_reset(coder->pos_special[i]); bittree_reset(coder->pos_align, ALIGN_BITS); // Len decoders (also bit/bittree) const uint32_t num_pos_states = 1U << options->pb; bit_reset(coder->match_len_decoder.choice); bit_reset(coder->match_len_decoder.choice2); bit_reset(coder->rep_len_decoder.choice); bit_reset(coder->rep_len_decoder.choice2); for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { bittree_reset(coder->match_len_decoder.low[pos_state], LEN_LOW_BITS); bittree_reset(coder->match_len_decoder.mid[pos_state], LEN_MID_BITS); bittree_reset(coder->rep_len_decoder.low[pos_state], LEN_LOW_BITS); bittree_reset(coder->rep_len_decoder.mid[pos_state], LEN_MID_BITS); } bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); coder->sequence = SEQ_IS_MATCH; coder->probs = NULL; coder->symbol = 0; coder->limit = 0; coder->offset = 0; coder->len = 0; return; } extern lzma_ret lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *opt, lzma_lz_options *lz_options) { if (lz->coder == NULL) { lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator); if (lz->coder == NULL) return LZMA_MEM_ERROR; lz->code = &lzma_decode; lz->reset = &lzma_decoder_reset; lz->set_uncompressed = &lzma_decoder_uncompressed; } // All dictionary sizes are OK here. LZ decoder will take care of // the special cases. const lzma_options_lzma *options = opt; lz_options->dict_size = options->dict_size; lz_options->preset_dict = options->preset_dict; lz_options->preset_dict_size = options->preset_dict_size; return LZMA_OK; } /// Allocate and initialize LZMA decoder. This is used only via LZ /// initialization (lzma_lzma_decoder_init() passes function pointer to /// the LZ initialization). static lzma_ret lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options) { if (!is_lclppb_valid(options)) return LZMA_PROG_ERROR; return_if_error(lzma_lzma_decoder_create( lz, allocator, options, lz_options)); lzma_decoder_reset(lz->coder, options); - lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN, true); return LZMA_OK; } extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { // LZMA can only be the last filter in the chain. This is enforced // by the raw_decoder initialization. assert(filters[1].init == NULL); return lzma_lz_decoder_init(next, allocator, filters, &lzma_decoder_init); } extern bool lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) { if (byte > (4 * 5 + 4) * 9 + 8) return true; // See the file format specification to understand this. options->pb = byte / (9 * 5); byte -= options->pb * 9 * 5; options->lp = byte / 9; options->lc = byte - options->lp * 9; return options->lc + options->lp > LZMA_LCLP_MAX; } extern uint64_t lzma_lzma_decoder_memusage_nocheck(const void *options) { const lzma_options_lzma *const opt = options; return sizeof(lzma_lzma1_decoder) + lzma_lz_decoder_memusage(opt->dict_size); } extern uint64_t lzma_lzma_decoder_memusage(const void *options) { if (!is_lclppb_valid(options)) return UINT64_MAX; return lzma_lzma_decoder_memusage_nocheck(options); } extern lzma_ret lzma_lzma_props_decode(void **options, const lzma_allocator *allocator, const uint8_t *props, size_t props_size) { if (props_size != 5) return LZMA_OPTIONS_ERROR; lzma_options_lzma *opt = lzma_alloc(sizeof(lzma_options_lzma), allocator); if (opt == NULL) return LZMA_MEM_ERROR; if (lzma_lzma_lclppb_decode(opt, props[0])) goto error; // All dictionary sizes are accepted, including zero. LZ decoder // will automatically use a dictionary at least a few KiB even if // a smaller dictionary is requested. opt->dict_size = read32le(props + 1); opt->preset_dict = NULL; opt->preset_dict_size = 0; *options = opt; return LZMA_OK; error: lzma_free(opt, allocator); return LZMA_OPTIONS_ERROR; } diff --git a/contrib/xz/src/liblzma/lzma/lzma_encoder.c b/contrib/xz/src/liblzma/lzma/lzma_encoder.c index 07d2b87bc65b..c1552f194451 100644 --- a/contrib/xz/src/liblzma/lzma/lzma_encoder.c +++ b/contrib/xz/src/liblzma/lzma/lzma_encoder.c @@ -1,677 +1,680 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder.c /// \brief LZMA encoder /// // Authors: Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "lzma2_encoder.h" #include "lzma_encoder_private.h" #include "fastpos.h" ///////////// // Literal // ///////////// static inline void literal_matched(lzma_range_encoder *rc, probability *subcoder, uint32_t match_byte, uint32_t symbol) { uint32_t offset = 0x100; symbol += UINT32_C(1) << 8; do { match_byte <<= 1; const uint32_t match_bit = match_byte & offset; const uint32_t subcoder_index = offset + match_bit + (symbol >> 8); const uint32_t bit = (symbol >> 7) & 1; rc_bit(rc, &subcoder[subcoder_index], bit); symbol <<= 1; offset &= ~(match_byte ^ symbol); } while (symbol < (UINT32_C(1) << 16)); } static inline void literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position) { // Locate the literal byte to be encoded and the subcoder. const uint8_t cur_byte = mf->buffer[ mf->read_pos - mf->read_ahead]; probability *subcoder = literal_subcoder(coder->literal, coder->literal_context_bits, coder->literal_pos_mask, position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); if (is_literal_state(coder->state)) { // Previous LZMA-symbol was a literal. Encode a normal // literal without a match byte. rc_bittree(&coder->rc, subcoder, 8, cur_byte); } else { // Previous LZMA-symbol was a match. Use the last byte of // the match as a "match byte". That is, compare the bits // of the current literal and the match byte. const uint8_t match_byte = mf->buffer[ mf->read_pos - coder->reps[0] - 1 - mf->read_ahead]; literal_matched(&coder->rc, subcoder, match_byte, cur_byte); } update_literal(coder->state); } ////////////////// // Match length // ////////////////// static void length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) { const uint32_t table_size = lc->table_size; lc->counters[pos_state] = table_size; const uint32_t a0 = rc_bit_0_price(lc->choice); const uint32_t a1 = rc_bit_1_price(lc->choice); const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); uint32_t *const prices = lc->prices[pos_state]; uint32_t i; for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) prices[i] = a0 + rc_bittree_price(lc->low[pos_state], LEN_LOW_BITS, i); for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], LEN_MID_BITS, i - LEN_LOW_SYMBOLS); for (; i < table_size; ++i) prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); return; } static inline void length(lzma_range_encoder *rc, lzma_length_encoder *lc, const uint32_t pos_state, uint32_t len, const bool fast_mode) { assert(len <= MATCH_LEN_MAX); len -= MATCH_LEN_MIN; if (len < LEN_LOW_SYMBOLS) { rc_bit(rc, &lc->choice, 0); rc_bittree(rc, lc->low[pos_state], LEN_LOW_BITS, len); } else { rc_bit(rc, &lc->choice, 1); len -= LEN_LOW_SYMBOLS; if (len < LEN_MID_SYMBOLS) { rc_bit(rc, &lc->choice2, 0); rc_bittree(rc, lc->mid[pos_state], LEN_MID_BITS, len); } else { rc_bit(rc, &lc->choice2, 1); len -= LEN_MID_SYMBOLS; rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); } } // Only getoptimum uses the prices so don't update the table when // in fast mode. if (!fast_mode) if (--lc->counters[pos_state] == 0) length_update_prices(lc, pos_state); } /////////// // Match // /////////// static inline void match(lzma_lzma1_encoder *coder, const uint32_t pos_state, const uint32_t distance, const uint32_t len) { update_match(coder->state); length(&coder->rc, &coder->match_len_encoder, pos_state, len, coder->fast_mode); const uint32_t dist_slot = get_dist_slot(distance); const uint32_t dist_state = get_dist_state(len); rc_bittree(&coder->rc, coder->dist_slot[dist_state], DIST_SLOT_BITS, dist_slot); if (dist_slot >= DIST_MODEL_START) { const uint32_t footer_bits = (dist_slot >> 1) - 1; const uint32_t base = (2 | (dist_slot & 1)) << footer_bits; const uint32_t dist_reduced = distance - base; if (dist_slot < DIST_MODEL_END) { // Careful here: base - dist_slot - 1 can be -1, but // rc_bittree_reverse starts at probs[1], not probs[0]. rc_bittree_reverse(&coder->rc, coder->dist_special + base - dist_slot - 1, footer_bits, dist_reduced); } else { rc_direct(&coder->rc, dist_reduced >> ALIGN_BITS, footer_bits - ALIGN_BITS); rc_bittree_reverse( &coder->rc, coder->dist_align, ALIGN_BITS, dist_reduced & ALIGN_MASK); ++coder->align_price_count; } } coder->reps[3] = coder->reps[2]; coder->reps[2] = coder->reps[1]; coder->reps[1] = coder->reps[0]; coder->reps[0] = distance; ++coder->match_price_count; } //////////////////// // Repeated match // //////////////////// static inline void rep_match(lzma_lzma1_encoder *coder, const uint32_t pos_state, const uint32_t rep, const uint32_t len) { if (rep == 0) { rc_bit(&coder->rc, &coder->is_rep0[coder->state], 0); rc_bit(&coder->rc, &coder->is_rep0_long[coder->state][pos_state], len != 1); } else { const uint32_t distance = coder->reps[rep]; rc_bit(&coder->rc, &coder->is_rep0[coder->state], 1); if (rep == 1) { rc_bit(&coder->rc, &coder->is_rep1[coder->state], 0); } else { rc_bit(&coder->rc, &coder->is_rep1[coder->state], 1); rc_bit(&coder->rc, &coder->is_rep2[coder->state], rep - 2); if (rep == 3) coder->reps[3] = coder->reps[2]; coder->reps[2] = coder->reps[1]; } coder->reps[1] = coder->reps[0]; coder->reps[0] = distance; } if (len == 1) { update_short_rep(coder->state); } else { length(&coder->rc, &coder->rep_len_encoder, pos_state, len, coder->fast_mode); update_long_rep(coder->state); } } ////////// // Main // ////////// static void encode_symbol(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t back, uint32_t len, uint32_t position) { const uint32_t pos_state = position & coder->pos_mask; if (back == UINT32_MAX) { // Literal i.e. eight-bit byte assert(len == 1); rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 0); literal(coder, mf, position); } else { // Some type of match rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); if (back < REPS) { // It's a repeated match i.e. the same distance // has been used earlier. rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); rep_match(coder, pos_state, back, len); } else { // Normal match rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); match(coder, pos_state, back - REPS, len); } } assert(mf->read_ahead >= len); mf->read_ahead -= len; } static bool encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf) { assert(mf_position(mf) == 0); if (mf->read_pos == mf->read_limit) { if (mf->action == LZMA_RUN) return false; // We cannot do anything. // We are finishing (we cannot get here when flushing). assert(mf->write_pos == mf->read_pos); assert(mf->action == LZMA_FINISH); } else { // Do the actual initialization. The first LZMA symbol must // always be a literal. mf_skip(mf, 1); mf->read_ahead = 0; rc_bit(&coder->rc, &coder->is_match[0][0], 0); rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); } // Initialization is done (except if empty file). coder->is_initialized = true; return true; } static void encode_eopm(lzma_lzma1_encoder *coder, uint32_t position) { const uint32_t pos_state = position & coder->pos_mask; rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); } /// Number of bytes that a single encoding loop in lzma_lzma_encode() can /// consume from the dictionary. This limit comes from lzma_lzma_optimum() /// and may need to be updated if that function is significantly modified. #define LOOP_INPUT_MAX (OPTS + 1) extern lzma_ret lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, uint32_t limit) { // Initialize the stream if no data has been encoded yet. if (!coder->is_initialized && !encode_init(coder, mf)) return LZMA_OK; // Get the lowest bits of the uncompressed offset from the LZ layer. uint32_t position = mf_position(mf); while (true) { // Encode pending bits, if any. Calling this before encoding // the next symbol is needed only with plain LZMA, since // LZMA2 always provides big enough buffer to flush // everything out from the range encoder. For the same reason, // rc_encode() never returns true when this function is used // as part of LZMA2 encoder. if (rc_encode(&coder->rc, out, out_pos, out_size)) { assert(limit == UINT32_MAX); return LZMA_OK; } // With LZMA2 we need to take care that compressed size of // a chunk doesn't get too big. // FIXME? Check if this could be improved. if (limit != UINT32_MAX && (mf->read_pos - mf->read_ahead >= limit || *out_pos + rc_pending(&coder->rc) >= LZMA2_CHUNK_MAX - LOOP_INPUT_MAX)) break; // Check that there is some input to process. if (mf->read_pos >= mf->read_limit) { if (mf->action == LZMA_RUN) return LZMA_OK; if (mf->read_ahead == 0) break; } // Get optimal match (repeat position and length). // Value ranges for pos: // - [0, REPS): repeated match // - [REPS, UINT32_MAX): // match at (pos - REPS) // - UINT32_MAX: not a match but a literal // Value ranges for len: // - [MATCH_LEN_MIN, MATCH_LEN_MAX] uint32_t len; uint32_t back; if (coder->fast_mode) lzma_lzma_optimum_fast(coder, mf, &back, &len); else lzma_lzma_optimum_normal( coder, mf, &back, &len, position); encode_symbol(coder, mf, back, len, position); position += len; } if (!coder->is_flushed) { coder->is_flushed = true; // We don't support encoding plain LZMA streams without EOPM, // and LZMA2 doesn't use EOPM at LZMA level. if (limit == UINT32_MAX) encode_eopm(coder, position); // Flush the remaining bytes from the range encoder. rc_flush(&coder->rc); // Copy the remaining bytes to the output buffer. If there // isn't enough output space, we will copy out the remaining // bytes on the next call to this function by using // the rc_encode() call in the encoding loop above. if (rc_encode(&coder->rc, out, out_pos, out_size)) { assert(limit == UINT32_MAX); return LZMA_OK; } } // Make it ready for the next LZMA2 chunk. coder->is_flushed = false; return LZMA_STREAM_END; } static lzma_ret lzma_encode(void *coder, lzma_mf *restrict mf, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { // Plain LZMA has no support for sync-flushing. if (unlikely(mf->action == LZMA_SYNC_FLUSH)) return LZMA_OPTIONS_ERROR; return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); } //////////////////// // Initialization // //////////////////// static bool is_options_valid(const lzma_options_lzma *options) { // Validate some of the options. LZ encoder validates nice_len too // but we need a valid value here earlier. return is_lclppb_valid(options) && options->nice_len >= MATCH_LEN_MIN && options->nice_len <= MATCH_LEN_MAX && (options->mode == LZMA_MODE_FAST || options->mode == LZMA_MODE_NORMAL); } static void set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) { // LZ encoder initialization does the validation for these so we // don't need to validate here. lz_options->before_size = OPTS; lz_options->dict_size = options->dict_size; lz_options->after_size = LOOP_INPUT_MAX; lz_options->match_len_max = MATCH_LEN_MAX; lz_options->nice_len = options->nice_len; lz_options->match_finder = options->mf; lz_options->depth = options->depth; lz_options->preset_dict = options->preset_dict; lz_options->preset_dict_size = options->preset_dict_size; return; } static void length_encoder_reset(lzma_length_encoder *lencoder, const uint32_t num_pos_states, const bool fast_mode) { bit_reset(lencoder->choice); bit_reset(lencoder->choice2); for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); } bittree_reset(lencoder->high, LEN_HIGH_BITS); if (!fast_mode) for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) length_update_prices(lencoder, pos_state); return; } extern lzma_ret lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder, const lzma_options_lzma *options) { if (!is_options_valid(options)) return LZMA_OPTIONS_ERROR; coder->pos_mask = (1U << options->pb) - 1; coder->literal_context_bits = options->lc; coder->literal_pos_mask = (1U << options->lp) - 1; // Range coder rc_reset(&coder->rc); // State coder->state = STATE_LIT_LIT; for (size_t i = 0; i < REPS; ++i) coder->reps[i] = 0; literal_init(coder->literal, options->lc, options->lp); // Bit encoders for (size_t i = 0; i < STATES; ++i) { for (size_t j = 0; j <= coder->pos_mask; ++j) { bit_reset(coder->is_match[i][j]); bit_reset(coder->is_rep0_long[i][j]); } bit_reset(coder->is_rep[i]); bit_reset(coder->is_rep0[i]); bit_reset(coder->is_rep1[i]); bit_reset(coder->is_rep2[i]); } for (size_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) bit_reset(coder->dist_special[i]); // Bit tree encoders for (size_t i = 0; i < DIST_STATES; ++i) bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); bittree_reset(coder->dist_align, ALIGN_BITS); // Length encoders length_encoder_reset(&coder->match_len_encoder, 1U << options->pb, coder->fast_mode); length_encoder_reset(&coder->rep_len_encoder, 1U << options->pb, coder->fast_mode); // Price counts are incremented every time appropriate probabilities // are changed. price counts are set to zero when the price tables // are updated, which is done when the appropriate price counts have // big enough value, and lzma_mf.read_ahead == 0 which happens at // least every OPTS (a few thousand) possible price count increments. // // By resetting price counts to UINT32_MAX / 2, we make sure that the // price tables will be initialized before they will be used (since // the value is definitely big enough), and that it is OK to increment // price counts without risk of integer overflow (since UINT32_MAX / 2 // is small enough). The current code doesn't increment price counts // before initializing price tables, but it maybe done in future if // we add support for saving the state between LZMA2 chunks. coder->match_price_count = UINT32_MAX / 2; coder->align_price_count = UINT32_MAX / 2; coder->opts_end_index = 0; coder->opts_current_index = 0; return LZMA_OK; } extern lzma_ret lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator, const lzma_options_lzma *options, lzma_lz_options *lz_options) { // Allocate lzma_lzma1_encoder if it wasn't already allocated. if (*coder_ptr == NULL) { *coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator); if (*coder_ptr == NULL) return LZMA_MEM_ERROR; } lzma_lzma1_encoder *coder = *coder_ptr; // Set compression mode. We haven't validates the options yet, // but it's OK here, since nothing bad happens with invalid // options in the code below, and they will get rejected by // lzma_lzma_encoder_reset() call at the end of this function. switch (options->mode) { case LZMA_MODE_FAST: coder->fast_mode = true; break; case LZMA_MODE_NORMAL: { coder->fast_mode = false; // Set dist_table_size. // Round the dictionary size up to next 2^n. uint32_t log_size = 0; while ((UINT32_C(1) << log_size) < options->dict_size) ++log_size; coder->dist_table_size = log_size * 2; // Length encoders' price table size coder->match_len_encoder.table_size = options->nice_len + 1 - MATCH_LEN_MIN; coder->rep_len_encoder.table_size = options->nice_len + 1 - MATCH_LEN_MIN; break; } default: return LZMA_OPTIONS_ERROR; } // We don't need to write the first byte as literal if there is // a non-empty preset dictionary. encode_init() wouldn't even work // if there is a non-empty preset dictionary, because encode_init() // assumes that position is zero and previous byte is also zero. coder->is_initialized = options->preset_dict != NULL && options->preset_dict_size > 0; coder->is_flushed = false; set_lz_options(lz_options, options); return lzma_lzma_encoder_reset(coder, options); } static lzma_ret lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, const void *options, lzma_lz_options *lz_options) { lz->code = &lzma_encode; return lzma_lzma_encoder_create( &lz->coder, allocator, options, lz_options); } extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const lzma_filter_info *filters) { return lzma_lz_encoder_init( next, allocator, filters, &lzma_encoder_init); } extern uint64_t lzma_lzma_encoder_memusage(const void *options) { if (!is_options_valid(options)) return UINT64_MAX; lzma_lz_options lz_options; set_lz_options(&lz_options, options); const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); if (lz_memusage == UINT64_MAX) return UINT64_MAX; return (uint64_t)(sizeof(lzma_lzma1_encoder)) + lz_memusage; } extern bool lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) { if (!is_lclppb_valid(options)) return true; *byte = (options->pb * 5 + options->lp) * 9 + options->lc; assert(*byte <= (4 * 5 + 4) * 9 + 8); return false; } #ifdef HAVE_ENCODER_LZMA1 extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out) { + if (options == NULL) + return LZMA_PROG_ERROR; + const lzma_options_lzma *const opt = options; if (lzma_lzma_lclppb_encode(opt, out)) return LZMA_PROG_ERROR; write32le(out + 1, opt->dict_size); return LZMA_OK; } #endif extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) { return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; } diff --git a/contrib/xz/src/liblzma/validate_map.sh b/contrib/xz/src/liblzma/validate_map.sh old mode 100755 new mode 100644 index 3aee46687c38..2bf6f8b98cbb --- a/contrib/xz/src/liblzma/validate_map.sh +++ b/contrib/xz/src/liblzma/validate_map.sh @@ -1,68 +1,165 @@ #!/bin/sh ############################################################################### # -# Check liblzma.map for certain types of errors +# Check liblzma_*.map for certain types of errors. +# +# liblzma_generic.map is for FreeBSD and Solaris and possibly others +# except GNU/Linux. +# +# liblzma_linux.map is for GNU/Linux only. This and the matching extra code +# in the .c files make liblzma >= 5.2.7 compatible with binaries that were +# linked against ill-patched liblzma in RHEL/CentOS 7. By providing the +# compatibility in official XZ Utils release will hopefully prevent people +# from further copying the broken patch to other places when they want +# compatibility with binaries linked on RHEL/CentOS 7. The long version +# of the story: +# +# RHEL/CentOS 7 shipped with 5.1.2alpha, including the threaded +# encoder that is behind #ifdef LZMA_UNSTABLE in the API headers. +# In 5.1.2alpha these symbols are under XZ_5.1.2alpha in liblzma.map. +# API/ABI compatibility tracking isn't done between development +# releases so newer releases didn't have XZ_5.1.2alpha anymore. +# +# Later RHEL/CentOS 7 updated xz to 5.2.2 but they wanted to keep +# the exported symbols compatible with 5.1.2alpha. After checking +# the ABI changes it turned out that >= 5.2.0 ABI is backward +# compatible with the threaded encoder functions from 5.1.2alpha +# (but not vice versa as fixes and extensions to these functions +# were made between 5.1.2alpha and 5.2.0). +# +# In RHEL/CentOS 7, XZ Utils 5.2.2 was patched with +# xz-5.2.2-compat-libs.patch to modify liblzma.map: +# +# - XZ_5.1.2alpha was added with lzma_stream_encoder_mt and +# lzma_stream_encoder_mt_memusage. This matched XZ Utils 5.1.2alpha. +# +# - XZ_5.2 was replaced with XZ_5.2.2. It is clear that this was +# an error; the intention was to keep using XZ_5.2 (XZ_5.2.2 +# has never been used in XZ Utils). So XZ_5.2.2 lists all +# symbols that were listed under XZ_5.2 before the patch. +# lzma_stream_encoder_mt and _mt_memusage are included too so +# they are listed both here and under XZ_5.1.2alpha. +# +# The patch didn't add any __asm__(".symver ...") lines to the .c +# files. Thus the resulting liblzma.so exports the threaded encoder +# functions under XZ_5.1.2alpha only. Listing the two functions +# also under XZ_5.2.2 in liblzma.map has no effect without +# matching .symver lines. +# +# The lack of XZ_5.2 in RHEL/CentOS 7 means that binaries linked +# against unpatched XZ Utils 5.2.x won't run on RHEL/CentOS 7. +# This is unfortunate but this alone isn't too bad as the problem +# is contained within RHEL/CentOS 7 and doesn't affect users +# of other distributions. It could also be fixed internally in +# RHEL/CentOS 7. +# +# The second problem is more serious: In XZ Utils 5.2.2 the API +# headers don't have #ifdef LZMA_UNSTABLE for obvious reasons. +# This is true in RHEL/CentOS 7 version too. Thus now programs +# using new APIs can be compiled without an extra #define. However, +# the programs end up depending on symbol version XZ_5.1.2alpha +# (and possibly also XZ_5.2.2) instead of XZ_5.2 as they would +# with an unpatched XZ Utils 5.2.2. This means that such binaries +# won't run on other distributions shipping XZ Utils >= 5.2.0 as +# they don't provide XZ_5.1.2alpha or XZ_5.2.2; they only provide +# XZ_5.2 (and XZ_5.0). (This includes RHEL/CentOS 8 as the patch +# luckily isn't included there anymore with XZ Utils 5.2.4.) +# +# Binaries built by RHEL/CentOS 7 users get distributed and then +# people wonder why they don't run on some other distribution. +# Seems that people have found out about the patch and been copying +# it to some build scripts, seemingly curing the symptoms but +# actually spreading the illness further and outside RHEL/CentOS 7. +# Adding compatibility in an official XZ Utils release should work +# as a vaccine against this ill patch and stop it from spreading. +# The vaccine is kept GNU/Linux-only as other OSes should be immune +# (hopefully it hasn't spread via some build script to other OSes). # # Author: Lasse Collin # # This file has been put into the public domain. # You can do whatever you want with this file. # ############################################################################### LC_ALL=C export LC_ALL STATUS=0 cd "$(dirname "$0")" -# Get the list of symbols that aren't defined in liblzma.map. +# Get the list of symbols that aren't defined in liblzma_generic.map. SYMS=$(sed -n 's/^extern LZMA_API([^)]*) \([a-z0-9_]*\)(.*$/\1;/p' \ api/lzma/*.h \ | sort \ - | grep -Fve "$(sed '/[{}:*]/d;/^$/d;s/^ //' liblzma.map)") + | grep -Fve "$(sed '/[{}:*]/d;/^$/d;s/^ //' liblzma_generic.map)") # Check that there are no old alpha or beta versions listed. VER=$(cd ../.. && sh build-aux/version.sh) NAMES= case $VER in *alpha | *beta) NAMES=$(sed -n 's/^.*XZ_\([^ ]*\)\(alpha\|beta\) .*$/\1\2/p' \ - liblzma.map | grep -Fv "$VER") + liblzma_generic.map | grep -Fv "$VER") ;; esac # Check for duplicate lines. It can catch missing dependencies. -DUPS=$(sort liblzma.map | sed '/^$/d;/^global:$/d' | uniq -d) +DUPS=$(sort liblzma_generic.map | sed '/^$/d;/^global:$/d' | uniq -d) + +# Check that liblzma_linux.map is in sync with liblzma_generic.map. +# The RHEL/CentOS 7 compatibility symbols are in a fixed location +# so it makes it easy to remove them for comparison with liblzma_generic.map. +# +# NOTE: Putting XZ_5.2 before the compatibility symbols XZ_5.1.2alpha +# and XZ_5.2.2 in liblzma_linux.map is important: If liblzma_linux.map is +# incorrectly used without #define HAVE_SYMBOL_VERSIONS_LINUX, only the first +# occurrence of each function name will be used from liblzma_linux.map; +# the rest are ignored by the linker. Thus having XZ_5.2 before the +# compatibility symbols means that @@XZ_5.2 will be used for the symbols +# listed under XZ_5.2 {...} and the same function names later in +# the file under XZ_5.1.2alpha {...} and XZ_5.2.2 {...} will be +# ignored (@XZ_5.1.2alpha or @XZ_5.2.2 won't be added at all when +# the #define HAVE_SYMBOL_VERSIONS_LINUX isn't used). +IN_SYNC= +if ! sed '109,123d' liblzma_linux.map \ + | cmp -s - liblzma_generic.map; then + IN_SYNC=no +fi # Print error messages if needed. -if test -n "$SYMS$NAMES$DUPS"; then +if test -n "$SYMS$NAMES$DUPS$IN_SYNC"; then echo - echo 'validate_map.sh found problems from liblzma.map:' + echo 'validate_map.sh found problems from liblzma_*.map:' echo if test -n "$SYMS"; then - echo 'liblzma.map lacks the following symbols:' + echo 'liblzma_generic.map lacks the following symbols:' echo "$SYMS" echo fi if test -n "$NAMES"; then echo 'Obsolete alpha or beta version names:' echo "$NAMES" echo fi if test -n "$DUPS"; then echo 'Duplicate lines:' echo "$DUPS" echo fi + if test -n "$IN_SYNC"; then + echo "liblzma_generic.map and liblzma_linux.map aren't in sync" + echo + fi + STATUS=1 fi # Exit status is 1 if problems were found, 0 otherwise. exit "$STATUS" diff --git a/contrib/xz/src/xz/coder.c b/contrib/xz/src/xz/coder.c index 85f954393d8b..4f51e8d5c0e0 100644 --- a/contrib/xz/src/xz/coder.c +++ b/contrib/xz/src/xz/coder.c @@ -1,944 +1,977 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file coder.c /// \brief Compresses or uncompresses a file // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" /// Return value type for coder_init(). enum coder_init_ret { CODER_INIT_NORMAL, CODER_INIT_PASSTHRU, CODER_INIT_ERROR, }; enum operation_mode opt_mode = MODE_COMPRESS; enum format_type opt_format = FORMAT_AUTO; bool opt_auto_adjust = true; bool opt_single_stream = false; uint64_t opt_block_size = 0; uint64_t *opt_block_list = NULL; /// Stream used to communicate with liblzma static lzma_stream strm = LZMA_STREAM_INIT; /// Filters needed for all encoding all formats, and also decoding in raw data static lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Input and output buffers static io_buf in_buf; static io_buf out_buf; /// Number of filters. Zero indicates that we are using a preset. static uint32_t filters_count = 0; /// Number of the preset (0-9) static uint32_t preset_number = LZMA_PRESET_DEFAULT; /// Integrity check type static lzma_check check; /// This becomes false if the --check=CHECK option is used. static bool check_default = true; #if defined(HAVE_ENCODERS) && defined(MYTHREAD_ENABLED) static lzma_mt mt_options = { .flags = 0, .timeout = 300, .filters = filters, }; #endif extern void coder_set_check(lzma_check new_check) { check = new_check; check_default = false; return; } static void forget_filter_chain(void) { // Setting a preset makes us forget a possibly defined custom // filter chain. while (filters_count > 0) { --filters_count; free(filters[filters_count].options); filters[filters_count].options = NULL; } return; } extern void coder_set_preset(uint32_t new_preset) { preset_number &= ~LZMA_PRESET_LEVEL_MASK; preset_number |= new_preset; forget_filter_chain(); return; } extern void coder_set_extreme(void) { preset_number |= LZMA_PRESET_EXTREME; forget_filter_chain(); return; } extern void coder_add_filter(lzma_vli id, void *options) { if (filters_count == LZMA_FILTERS_MAX) message_fatal(_("Maximum number of filters is four")); filters[filters_count].id = id; filters[filters_count].options = options; ++filters_count; // Setting a custom filter chain makes us forget the preset options. // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" // where the custom filter chain resets the preset level back to // the default 6, making the example equivalent to "xz -6e". preset_number = LZMA_PRESET_DEFAULT; return; } static void lzma_attribute((__noreturn__)) memlimit_too_small(uint64_t memory_usage) { message(V_ERROR, _("Memory usage limit is too low for the given " "filter setup.")); message_mem_needed(V_ERROR, memory_usage); tuklib_exit(E_ERROR, E_ERROR, false); } extern void coder_set_compression_settings(void) { // The default check type is CRC64, but fallback to CRC32 // if CRC64 isn't supported by the copy of liblzma we are // using. CRC32 is always supported. if (check_default) { check = LZMA_CHECK_CRC64; if (!lzma_check_is_supported(check)) check = LZMA_CHECK_CRC32; } // Options for LZMA1 or LZMA2 in case we are using a preset. static lzma_options_lzma opt_lzma; if (filters_count == 0) { // We are using a preset. This is not a good idea in raw mode // except when playing around with things. Different versions // of this software may use different options in presets, and // thus make uncompressing the raw data difficult. if (opt_format == FORMAT_RAW) { // The message is shown only if warnings are allowed // but the exit status isn't changed. message(V_WARNING, _("Using a preset in raw mode " "is discouraged.")); message(V_WARNING, _("The exact options of the " "presets may vary between software " "versions.")); } // Get the preset for LZMA1 or LZMA2. if (lzma_lzma_preset(&opt_lzma, preset_number)) message_bug(); // Use LZMA2 except with --format=lzma we use LZMA1. filters[0].id = opt_format == FORMAT_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; filters[0].options = &opt_lzma; filters_count = 1; } // Terminate the filter options array. filters[filters_count].id = LZMA_VLI_UNKNOWN; // If we are using the .lzma format, allow exactly one filter // which has to be LZMA1. if (opt_format == FORMAT_LZMA && (filters_count != 1 || filters[0].id != LZMA_FILTER_LZMA1)) message_fatal(_("The .lzma format supports only " "the LZMA1 filter")); // If we are using the .xz format, make sure that there is no LZMA1 // filter to prevent LZMA_PROG_ERROR. if (opt_format == FORMAT_XZ) for (size_t i = 0; i < filters_count; ++i) if (filters[i].id == LZMA_FILTER_LZMA1) message_fatal(_("LZMA1 cannot be used " "with the .xz format")); // Print the selected filter chain. message_filters_show(V_DEBUG, filters); // The --flush-timeout option requires LZMA_SYNC_FLUSH support // from the filter chain. Currently threaded encoder doesn't support // LZMA_SYNC_FLUSH so single-threaded mode must be used. if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { for (size_t i = 0; i < filters_count; ++i) { switch (filters[i].id) { case LZMA_FILTER_LZMA2: case LZMA_FILTER_DELTA: break; default: message_fatal(_("The filter chain is " "incompatible with --flush-timeout")); } } if (hardware_threads_get() > 1) { message(V_WARNING, _("Switching to single-threaded " "mode due to --flush-timeout")); hardware_threads_set(1); } } // Get the memory usage. Note that if --format=raw was used, // we can be decompressing. const uint64_t memory_limit = hardware_memlimit_get(opt_mode); uint64_t memory_usage = UINT64_MAX; if (opt_mode == MODE_COMPRESS) { #ifdef HAVE_ENCODERS # ifdef MYTHREAD_ENABLED if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) { mt_options.threads = hardware_threads_get(); mt_options.block_size = opt_block_size; mt_options.check = check; memory_usage = lzma_stream_encoder_mt_memusage( &mt_options); if (memory_usage != UINT64_MAX) message(V_DEBUG, _("Using up to %" PRIu32 " threads."), mt_options.threads); } else # endif { memory_usage = lzma_raw_encoder_memusage(filters); } #endif } else { #ifdef HAVE_DECODERS memory_usage = lzma_raw_decoder_memusage(filters); #endif } if (memory_usage == UINT64_MAX) message_fatal(_("Unsupported filter chain or filter options")); // Print memory usage info before possible dictionary // size auto-adjusting. // // NOTE: If only encoder support was built, we cannot show the // what the decoder memory usage will be. message_mem_needed(V_DEBUG, memory_usage); #ifdef HAVE_DECODERS if (opt_mode == MODE_COMPRESS) { const uint64_t decmem = lzma_raw_decoder_memusage(filters); if (decmem != UINT64_MAX) message(V_DEBUG, _("Decompression will need " "%s MiB of memory."), uint64_to_str( round_up_to_mib(decmem), 0)); } #endif if (memory_usage <= memory_limit) return; // If --no-adjust was used or we didn't find LZMA1 or // LZMA2 as the last filter, give an error immediately. // --format=raw implies --no-adjust. if (!opt_auto_adjust || opt_format == FORMAT_RAW) memlimit_too_small(memory_usage); assert(opt_mode == MODE_COMPRESS); #ifdef HAVE_ENCODERS # ifdef MYTHREAD_ENABLED if (opt_format == FORMAT_XZ && mt_options.threads > 1) { // Try to reduce the number of threads before // adjusting the compression settings down. do { // FIXME? The real single-threaded mode has // lower memory usage, but it's not comparable // because it doesn't write the size info // into Block Headers. if (--mt_options.threads == 0) memlimit_too_small(memory_usage); memory_usage = lzma_stream_encoder_mt_memusage( &mt_options); if (memory_usage == UINT64_MAX) message_bug(); } while (memory_usage > memory_limit); message(V_WARNING, _("Adjusted the number of threads " "from %s to %s to not exceed " "the memory usage limit of %s MiB"), uint64_to_str(hardware_threads_get(), 0), uint64_to_str(mt_options.threads, 1), uint64_to_str(round_up_to_mib( memory_limit), 2)); } # endif if (memory_usage <= memory_limit) return; // Look for the last filter if it is LZMA2 or LZMA1, so we can make // it use less RAM. With other filters we don't know what to do. size_t i = 0; while (filters[i].id != LZMA_FILTER_LZMA2 && filters[i].id != LZMA_FILTER_LZMA1) { if (filters[i].id == LZMA_VLI_UNKNOWN) memlimit_too_small(memory_usage); ++i; } // Decrease the dictionary size until we meet the memory // usage limit. First round down to full mebibytes. lzma_options_lzma *opt = filters[i].options; const uint32_t orig_dict_size = opt->dict_size; opt->dict_size &= ~((UINT32_C(1) << 20) - 1); while (true) { // If it is below 1 MiB, auto-adjusting failed. We could be // more sophisticated and scale it down even more, but let's // see if many complain about this version. // // FIXME: Displays the scaled memory usage instead // of the original. if (opt->dict_size < (UINT32_C(1) << 20)) memlimit_too_small(memory_usage); memory_usage = lzma_raw_encoder_memusage(filters); if (memory_usage == UINT64_MAX) message_bug(); // Accept it if it is low enough. if (memory_usage <= memory_limit) break; // Otherwise 1 MiB down and try again. I hope this // isn't too slow method for cases where the original // dict_size is very big. opt->dict_size -= UINT32_C(1) << 20; } // Tell the user that we decreased the dictionary size. message(V_WARNING, _("Adjusted LZMA%c dictionary size " "from %s MiB to %s MiB to not exceed " "the memory usage limit of %s MiB"), filters[i].id == LZMA_FILTER_LZMA2 ? '2' : '1', uint64_to_str(orig_dict_size >> 20, 0), uint64_to_str(opt->dict_size >> 20, 1), uint64_to_str(round_up_to_mib(memory_limit), 2)); #endif return; } #ifdef HAVE_DECODERS /// Return true if the data in in_buf seems to be in the .xz format. static bool is_format_xz(void) { // Specify the magic as hex to be compatible with EBCDIC systems. static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; return strm.avail_in >= sizeof(magic) && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; } /// Return true if the data in in_buf seems to be in the .lzma format. static bool is_format_lzma(void) { // The .lzma header is 13 bytes. if (strm.avail_in < 13) return false; // Decode the LZMA1 properties. lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) return false; // A hack to ditch tons of false positives: We allow only dictionary // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone // created only files with 2^n, but accepts any dictionary size. // If someone complains, this will be reconsidered. lzma_options_lzma *opt = filter.options; const uint32_t dict_size = opt->dict_size; free(opt); if (dict_size != UINT32_MAX) { uint32_t d = dict_size - 1; d |= d >> 2; d |= d >> 3; d |= d >> 4; d |= d >> 8; d |= d >> 16; ++d; if (d != dict_size || dict_size == 0) return false; } // Another hack to ditch false positives: Assume that if the // uncompressed size is known, it must be less than 256 GiB. // Again, if someone complains, this will be reconsidered. uint64_t uncompressed_size = 0; for (size_t i = 0; i < 8; ++i) uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); if (uncompressed_size != UINT64_MAX && uncompressed_size > (UINT64_C(1) << 38)) return false; return true; } #endif /// Detect the input file type (for now, this done only when decompressing), /// and initialize an appropriate coder. Return value indicates if a normal /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred /// (CODER_INIT_ERROR). static enum coder_init_ret coder_init(file_pair *pair) { lzma_ret ret = LZMA_PROG_ERROR; if (opt_mode == MODE_COMPRESS) { #ifdef HAVE_ENCODERS switch (opt_format) { case FORMAT_AUTO: // args.c ensures this. assert(0); break; case FORMAT_XZ: # ifdef MYTHREAD_ENABLED if (hardware_threads_get() > 1) ret = lzma_stream_encoder_mt( &strm, &mt_options); else # endif ret = lzma_stream_encoder( &strm, filters, check); break; case FORMAT_LZMA: ret = lzma_alone_encoder(&strm, filters[0].options); break; case FORMAT_RAW: ret = lzma_raw_encoder(&strm, filters); break; } #endif } else { #ifdef HAVE_DECODERS uint32_t flags = 0; // It seems silly to warn about unsupported check if the // check won't be verified anyway due to --ignore-check. if (opt_ignore_check) flags |= LZMA_IGNORE_CHECK; else flags |= LZMA_TELL_UNSUPPORTED_CHECK; if (!opt_single_stream) flags |= LZMA_CONCATENATED; // We abuse FORMAT_AUTO to indicate unknown file format, // for which we may consider passthru mode. enum format_type init_format = FORMAT_AUTO; switch (opt_format) { case FORMAT_AUTO: if (is_format_xz()) init_format = FORMAT_XZ; else if (is_format_lzma()) init_format = FORMAT_LZMA; break; case FORMAT_XZ: if (is_format_xz()) init_format = FORMAT_XZ; break; case FORMAT_LZMA: if (is_format_lzma()) init_format = FORMAT_LZMA; break; case FORMAT_RAW: init_format = FORMAT_RAW; break; } switch (init_format) { case FORMAT_AUTO: // Unknown file format. If --decompress --stdout // --force have been given, then we copy the input // as is to stdout. Checking for MODE_DECOMPRESS // is needed, because we don't want to do use // passthru mode with --test. if (opt_mode == MODE_DECOMPRESS - && opt_stdout && opt_force) + && opt_stdout && opt_force) { + // These are needed for progress info. + strm.total_in = 0; + strm.total_out = 0; return CODER_INIT_PASSTHRU; + } ret = LZMA_FORMAT_ERROR; break; case FORMAT_XZ: ret = lzma_stream_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS), flags); break; case FORMAT_LZMA: ret = lzma_alone_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS)); break; case FORMAT_RAW: // Memory usage has already been checked in // coder_set_compression_settings(). ret = lzma_raw_decoder(&strm, filters); break; } // Try to decode the headers. This will catch too low // memory usage limit in case it happens in the first // Block of the first Stream, which is where it very // probably will happen if it is going to happen. + // + // This will also catch unsupported check type which + // we treat as a warning only. If there are empty + // concatenated Streams with unsupported check type then + // the message can be shown more than once here. The loop + // is used in case there is first a warning about + // unsupported check type and then the first Block + // would exceed the memlimit. if (ret == LZMA_OK && init_format != FORMAT_RAW) { strm.next_out = NULL; strm.avail_out = 0; - ret = lzma_code(&strm, LZMA_RUN); + while ((ret = lzma_code(&strm, LZMA_RUN)) + == LZMA_UNSUPPORTED_CHECK) + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // With --single-stream lzma_code won't wait for + // LZMA_FINISH and thus it can return LZMA_STREAM_END + // if the file has no uncompressed data inside. + // So treat LZMA_STREAM_END as LZMA_OK here. + // When lzma_code() is called again in coder_normal() + // it will return LZMA_STREAM_END again. + if (ret == LZMA_STREAM_END) + ret = LZMA_OK; } #endif } if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); if (ret == LZMA_MEMLIMIT_ERROR) message_mem_needed(V_ERROR, lzma_memusage(&strm)); return CODER_INIT_ERROR; } return CODER_INIT_NORMAL; } /// Resolve conflicts between opt_block_size and opt_block_list in single /// threaded mode. We want to default to opt_block_list, except when it is /// larger than opt_block_size. If this is the case for the current Block /// at *list_pos, then we break into smaller Blocks. Otherwise advance /// to the next Block in opt_block_list, and break apart if needed. static void split_block(uint64_t *block_remaining, uint64_t *next_block_remaining, size_t *list_pos) { if (*next_block_remaining > 0) { // The Block at *list_pos has previously been split up. assert(hardware_threads_get() == 1); assert(opt_block_size > 0); assert(opt_block_list != NULL); if (*next_block_remaining > opt_block_size) { // We have to split the current Block at *list_pos // into another opt_block_size length Block. *block_remaining = opt_block_size; } else { // This is the last remaining split Block for the // Block at *list_pos. *block_remaining = *next_block_remaining; } *next_block_remaining -= *block_remaining; } else { // The Block at *list_pos has been finished. Go to the next // entry in the list. If the end of the list has been reached, // reuse the size of the last Block. if (opt_block_list[*list_pos + 1] != 0) ++*list_pos; *block_remaining = opt_block_list[*list_pos]; // If in single-threaded mode, split up the Block if needed. // This is not needed in multi-threaded mode because liblzma // will do this due to how threaded encoding works. if (hardware_threads_get() == 1 && opt_block_size > 0 && *block_remaining > opt_block_size) { *next_block_remaining = *block_remaining - opt_block_size; *block_remaining = opt_block_size; } } } static bool coder_write_output(file_pair *pair) { if (opt_mode != MODE_TEST) { if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) return true; } strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; return false; } /// Compress or decompress using liblzma. static bool coder_normal(file_pair *pair) { // Encoder needs to know when we have given all the input to it. // The decoders need to know it too when we are using // LZMA_CONCATENATED. We need to check for src_eof here, because // the first input chunk has been already read if decompressing, // and that may have been the only chunk we will read. lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; lzma_ret ret; // Assume that something goes wrong. bool success = false; // block_remaining indicates how many input bytes to encode before // finishing the current .xz Block. The Block size is set with // --block-size=SIZE and --block-list. They have an effect only when // compressing to the .xz format. If block_remaining == UINT64_MAX, // only a single block is created. uint64_t block_remaining = UINT64_MAX; // next_block_remaining for when we are in single-threaded mode and // the Block in --block-list is larger than the --block-size=SIZE. uint64_t next_block_remaining = 0; // Position in opt_block_list. Unused if --block-list wasn't used. size_t list_pos = 0; // Handle --block-size for single-threaded mode and the first step // of --block-list. if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { // --block-size doesn't do anything here in threaded mode, // because the threaded encoder will take care of splitting // to fixed-sized Blocks. if (hardware_threads_get() == 1 && opt_block_size > 0) block_remaining = opt_block_size; // If --block-list was used, start with the first size. // // For threaded case, --block-size specifies how big Blocks // the encoder needs to be prepared to create at maximum // and --block-list will simultaneously cause new Blocks // to be started at specified intervals. To keep things // logical, the same is done in single-threaded mode. The // output is still not identical because in single-threaded // mode the size info isn't written into Block Headers. if (opt_block_list != NULL) { if (block_remaining < opt_block_list[list_pos]) { assert(hardware_threads_get() == 1); next_block_remaining = opt_block_list[list_pos] - block_remaining; } else { block_remaining = opt_block_list[list_pos]; } } } strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; while (!user_abort) { // Fill the input buffer if it is empty and we aren't // flushing or finishing. if (strm.avail_in == 0 && action == LZMA_RUN) { strm.next_in = in_buf.u8; strm.avail_in = io_read(pair, &in_buf, my_min(block_remaining, IO_BUFFER_SIZE)); if (strm.avail_in == SIZE_MAX) break; if (pair->src_eof) { action = LZMA_FINISH; } else if (block_remaining != UINT64_MAX) { // Start a new Block after every // opt_block_size bytes of input. block_remaining -= strm.avail_in; if (block_remaining == 0) action = LZMA_FULL_BARRIER; } if (action == LZMA_RUN && pair->flush_needed) action = LZMA_SYNC_FLUSH; } // Let liblzma do the actual work. ret = lzma_code(&strm, action); // Write out if the output buffer became full. if (strm.avail_out == 0) { if (coder_write_output(pair)) break; } if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH || action == LZMA_FULL_BARRIER)) { if (action == LZMA_SYNC_FLUSH) { // Flushing completed. Write the pending data // out immediately so that the reading side // can decompress everything compressed so far. if (coder_write_output(pair)) break; // Mark that we haven't seen any new input // since the previous flush. pair->src_has_seen_input = false; pair->flush_needed = false; } else { // Start a new Block after LZMA_FULL_BARRIER. if (opt_block_list == NULL) { assert(hardware_threads_get() == 1); assert(opt_block_size > 0); block_remaining = opt_block_size; } else { split_block(&block_remaining, &next_block_remaining, &list_pos); } } // Start a new Block after LZMA_FULL_FLUSH or continue // the same block after LZMA_SYNC_FLUSH. action = LZMA_RUN; } else if (ret != LZMA_OK) { // Determine if the return value indicates that we - // won't continue coding. - const bool stop = ret != LZMA_NO_CHECK - && ret != LZMA_UNSUPPORTED_CHECK; + // won't continue coding. LZMA_NO_CHECK would be + // here too if LZMA_TELL_ANY_CHECK was used. + const bool stop = ret != LZMA_UNSUPPORTED_CHECK; if (stop) { // Write the remaining bytes even if something // went wrong, because that way the user gets // as much data as possible, which can be good // when trying to get at least some useful // data out of damaged files. if (coder_write_output(pair)) break; } if (ret == LZMA_STREAM_END) { if (opt_single_stream) { io_fix_src_pos(pair, strm.avail_in); success = true; break; } // Check that there is no trailing garbage. // This is needed for LZMA_Alone and raw // streams. if (strm.avail_in == 0 && !pair->src_eof) { // Try reading one more byte. // Hopefully we don't get any more // input, and thus pair->src_eof // becomes true. strm.avail_in = io_read( pair, &in_buf, 1); if (strm.avail_in == SIZE_MAX) break; assert(strm.avail_in == 0 || strm.avail_in == 1); } if (strm.avail_in == 0) { assert(pair->src_eof); success = true; break; } // We hadn't reached the end of the file. ret = LZMA_DATA_ERROR; assert(stop); } // If we get here and stop is true, something went // wrong and we print an error. Otherwise it's just // a warning and coding can continue. if (stop) { message_error("%s: %s", pair->src_name, message_strm(ret)); } else { message_warning("%s: %s", pair->src_name, message_strm(ret)); // When compressing, all possible errors set // stop to true. assert(opt_mode != MODE_COMPRESS); } if (ret == LZMA_MEMLIMIT_ERROR) { // Display how much memory it would have // actually needed. message_mem_needed(V_ERROR, lzma_memusage(&strm)); } if (stop) break; } // Show progress information under certain conditions. message_progress_update(); } return success; } /// Copy from input file to output file without processing the data in any /// way. This is used only when trying to decompress unrecognized files /// with --decompress --stdout --force, so the output is always stdout. static bool coder_passthru(file_pair *pair) { while (strm.avail_in != 0) { if (user_abort) return false; if (io_write(pair, &in_buf, strm.avail_in)) return false; strm.total_in += strm.avail_in; strm.total_out = strm.total_in; message_progress_update(); strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); if (strm.avail_in == SIZE_MAX) return false; } return true; } extern void coder_run(const char *filename) { // Set and possibly print the filename for the progress message. message_filename(filename); // Try to open the input file. file_pair *pair = io_open_src(filename); if (pair == NULL) return; // Assume that something goes wrong. bool success = false; if (opt_mode == MODE_COMPRESS) { strm.next_in = NULL; strm.avail_in = 0; } else { // Read the first chunk of input data. This is needed // to detect the input file type. strm.next_in = in_buf.u8; strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); } if (strm.avail_in != SIZE_MAX) { // Initialize the coder. This will detect the file format // and, in decompression or testing mode, check the memory // usage of the first Block too. This way we don't try to // open the destination file if we see that coding wouldn't // work at all anyway. This also avoids deleting the old // "target" file if --force was used. const enum coder_init_ret init_ret = coder_init(pair); if (init_ret != CODER_INIT_ERROR && !user_abort) { // Don't open the destination file when --test // is used. if (opt_mode == MODE_TEST || !io_open_dest(pair)) { // Remember the current time. It is needed // for progress indicator. mytime_set_start_time(); // Initialize the progress indicator. + // + // NOTE: When reading from stdin, fstat() + // isn't called on it and thus src_st.st_size + // is zero. If stdin pointed to a regular + // file, it would still be possible to know + // the file size but then we would also need + // to take into account the current reading + // position since with stdin it isn't + // necessarily at the beginning of the file. const bool is_passthru = init_ret == CODER_INIT_PASSTHRU; const uint64_t in_size = pair->src_st.st_size <= 0 ? 0 : (uint64_t)(pair->src_st.st_size); message_progress_start(&strm, is_passthru, in_size); // Do the actual coding or passthru. if (is_passthru) success = coder_passthru(pair); else success = coder_normal(pair); message_progress_end(success); } } } // Close the file pair. It needs to know if coding was successful to // know if the source or target file should be unlinked. io_close(pair, success); return; } #ifndef NDEBUG extern void coder_free(void) { lzma_end(&strm); return; } #endif diff --git a/contrib/xz/src/xz/file_io.c b/contrib/xz/src/xz/file_io.c index 0ba8db8fbc4c..41e4c2d893f8 100644 --- a/contrib/xz/src/xz/file_io.c +++ b/contrib/xz/src/xz/file_io.c @@ -1,1321 +1,1333 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file file_io.c /// \brief File opening, unlinking, and closing // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include #ifdef TUKLIB_DOSLIKE # include #else # include static bool warn_fchown; #endif #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) # include #elif defined(HAVE__FUTIME) # include #elif defined(HAVE_UTIME) # include #endif #ifdef HAVE_CAPSICUM # ifdef HAVE_SYS_CAPSICUM_H # include # else # include # endif #endif #include "tuklib_open_stdxxx.h" #ifndef O_BINARY # define O_BINARY 0 #endif #ifndef O_NOCTTY # define O_NOCTTY 0 #endif // Using this macro to silence a warning from gcc -Wlogical-op. #if EAGAIN == EWOULDBLOCK # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) #else # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ ((e) == EAGAIN || (e) == EWOULDBLOCK) #endif typedef enum { IO_WAIT_MORE, // Reading or writing is possible. IO_WAIT_ERROR, // Error or user_abort IO_WAIT_TIMEOUT, // poll() timed out } io_wait_ret; /// If true, try to create sparse files when decompressing. static bool try_sparse = true; #ifdef ENABLE_SANDBOX /// True if the conditions for sandboxing (described in main()) have been met. static bool sandbox_allowed = false; #endif #ifndef TUKLIB_DOSLIKE /// File status flags of standard input. This is used by io_open_src() /// and io_close_src(). static int stdin_flags; static bool restore_stdin_flags = false; /// Original file status flags of standard output. This is used by /// io_open_dest() and io_close_dest() to save and restore the flags. static int stdout_flags; static bool restore_stdout_flags = false; /// Self-pipe used together with the user_abort variable to avoid /// race conditions with signal handling. static int user_abort_pipe[2]; #endif static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); extern void io_init(void) { // Make sure that stdin, stdout, and stderr are connected to // a valid file descriptor. Exit immediately with exit code ERROR // if we cannot make the file descriptors valid. Maybe we should // print an error message, but our stderr could be screwed anyway. tuklib_open_stdxxx(E_ERROR); #ifndef TUKLIB_DOSLIKE // If fchown() fails setting the owner, we warn about it only if // we are root. warn_fchown = geteuid() == 0; // Create a pipe for the self-pipe trick. if (pipe(user_abort_pipe)) message_fatal(_("Error creating a pipe: %s"), strerror(errno)); // Make both ends of the pipe non-blocking. for (unsigned i = 0; i < 2; ++i) { int flags = fcntl(user_abort_pipe[i], F_GETFL); if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, flags | O_NONBLOCK) == -1) message_fatal(_("Error creating a pipe: %s"), strerror(errno)); } #endif #ifdef __DJGPP__ // Avoid doing useless things when statting files. // This isn't important but doesn't hurt. _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; #endif return; } #ifndef TUKLIB_DOSLIKE extern void io_write_to_user_abort_pipe(void) { // If the write() fails, it's probably due to the pipe being full. // Failing in that case is fine. If the reason is something else, // there's not much we can do since this is called in a signal // handler. So ignore the errors and try to avoid warnings with // GCC and glibc when _FORTIFY_SOURCE=2 is used. uint8_t b = '\0'; const int ret = write(user_abort_pipe[1], &b, 1); (void)ret; return; } #endif extern void io_no_sparse(void) { try_sparse = false; return; } #ifdef ENABLE_SANDBOX extern void io_allow_sandbox(void) { sandbox_allowed = true; return; } /// Enables operating-system-specific sandbox if it is possible. /// src_fd is the file descriptor of the input file. static void io_sandbox_enter(int src_fd) { if (!sandbox_allowed) { // This message is more often annoying than useful so // it's commented out. It can be useful when developing // the sandboxing code. //message(V_DEBUG, _("Sandbox is disabled due " // "to incompatible command line arguments")); return; } const char dummy_str[] = "x"; // Try to ensure that both libc and xz locale files have been // loaded when NLS is enabled. snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); // Try to ensure that iconv data files needed for handling multibyte // characters have been loaded. This is needed at least with glibc. tuklib_mbstr_width(dummy_str, NULL); #ifdef HAVE_CAPSICUM // Capsicum needs FreeBSD 10.0 or later. cap_rights_t rights; if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) goto error; if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, CAP_WRITE, CAP_SEEK))) goto error; if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, CAP_EVENT))) goto error; if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, CAP_WRITE))) goto error; if (cap_enter()) goto error; #else # error ENABLE_SANDBOX is defined but no sandboxing method was found. #endif // This message is annoying in xz -lvv. //message(V_DEBUG, _("Sandbox was successfully enabled")); return; error: message(V_DEBUG, _("Failed to enable the sandbox")); } #endif // ENABLE_SANDBOX #ifndef TUKLIB_DOSLIKE /// \brief Waits for input or output to become available or for a signal /// /// This uses the self-pipe trick to avoid a race condition that can occur /// if a signal is caught after user_abort has been checked but before e.g. /// read() has been called. In that situation read() could block unless /// non-blocking I/O is used. With non-blocking I/O something like select() /// or poll() is needed to avoid a busy-wait loop, and the same race condition /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in /// POSIX) but neither is portable enough in 2013. The self-pipe trick is /// old and very portable. static io_wait_ret io_wait(file_pair *pair, int timeout, bool is_reading) { struct pollfd pfd[2]; if (is_reading) { pfd[0].fd = pair->src_fd; pfd[0].events = POLLIN; } else { pfd[0].fd = pair->dest_fd; pfd[0].events = POLLOUT; } pfd[1].fd = user_abort_pipe[0]; pfd[1].events = POLLIN; while (true) { const int ret = poll(pfd, 2, timeout); if (user_abort) return IO_WAIT_ERROR; if (ret == -1) { if (errno == EINTR || errno == EAGAIN) continue; message_error(_("%s: poll() failed: %s"), is_reading ? pair->src_name : pair->dest_name, strerror(errno)); return IO_WAIT_ERROR; } if (ret == 0) return IO_WAIT_TIMEOUT; if (pfd[0].revents != 0) return IO_WAIT_MORE; } } #endif /// \brief Unlink a file /// /// This tries to verify that the file being unlinked really is the file that /// we want to unlink by verifying device and inode numbers. There's still /// a small unavoidable race, but this is much better than nothing (the file /// could have been moved/replaced even hours earlier). static void io_unlink(const char *name, const struct stat *known_st) { #if defined(TUKLIB_DOSLIKE) // On DOS-like systems, st_ino is meaningless, so don't bother // testing it. Just silence a compiler warning. (void)known_st; #else struct stat new_st; // If --force was used, use stat() instead of lstat(). This way // (de)compressing symlinks works correctly. However, it also means // that xz cannot detect if a regular file foo is renamed to bar // and then a symlink foo -> bar is created. Because of stat() // instead of lstat(), xz will think that foo hasn't been replaced // with another file. Thus, xz will remove foo even though it no // longer is the same file that xz used when it started compressing. // Probably it's not too bad though, so this doesn't need a more // complex fix. const int stat_ret = opt_force ? stat(name, &new_st) : lstat(name, &new_st); if (stat_ret # ifdef __VMS // st_ino is an array, and we don't want to // compare st_dev at all. || memcmp(&new_st.st_ino, &known_st->st_ino, sizeof(new_st.st_ino)) != 0 # else // Typical POSIX-like system || new_st.st_dev != known_st->st_dev || new_st.st_ino != known_st->st_ino # endif ) // TRANSLATORS: When compression or decompression finishes, // and xz is going to remove the source file, xz first checks // if the source file still exists, and if it does, does its // device and inode numbers match what xz saw when it opened // the source file. If these checks fail, this message is // shown, %s being the filename, and the file is not deleted. // The check for device and inode numbers is there, because // it is possible that the user has put a new file in place // of the original file, and in that case it obviously // shouldn't be removed. - message_error(_("%s: File seems to have been moved, " + message_warning(_("%s: File seems to have been moved, " "not removing"), name); else #endif // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. if (unlink(name)) - message_error(_("%s: Cannot remove: %s"), + message_warning(_("%s: Cannot remove: %s"), name, strerror(errno)); return; } /// \brief Copies owner/group and permissions /// /// \todo ACL and EA support /// static void io_copy_attrs(const file_pair *pair) { // Skip chown and chmod on Windows. #ifndef TUKLIB_DOSLIKE // This function is more tricky than you may think at first. // Blindly copying permissions may permit users to access the // destination file who didn't have permission to access the // source file. // Try changing the owner of the file. If we aren't root or the owner // isn't already us, fchown() probably doesn't succeed. We warn // about failing fchown() only if we are root. if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) && warn_fchown) message_warning(_("%s: Cannot set the file owner: %s"), pair->dest_name, strerror(errno)); mode_t mode; - if (fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) { + // With BSD semantics the new dest file may have a group that + // does not belong to the user. If the src file has the same gid + // nothing has to be done. Nevertheless OpenBSD fchown(2) fails + // in this case which seems to be POSIX compliant. As there is + // nothing to do, skip the system call. + if (pair->dest_st.st_gid != pair->src_st.st_gid + && fchown(pair->dest_fd, (uid_t)(-1), + pair->src_st.st_gid)) { message_warning(_("%s: Cannot set the file group: %s"), pair->dest_name, strerror(errno)); // We can still safely copy some additional permissions: // `group' must be at least as strict as `other' and // also vice versa. // // NOTE: After this, the owner of the source file may // get additional permissions. This shouldn't be too bad, // because the owner would have had permission to chmod // the original file anyway. mode = ((pair->src_st.st_mode & 0070) >> 3) & (pair->src_st.st_mode & 0007); mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; } else { // Drop the setuid, setgid, and sticky bits. mode = pair->src_st.st_mode & 0777; } if (fchmod(pair->dest_fd, mode)) message_warning(_("%s: Cannot set the file permissions: %s"), pair->dest_name, strerror(errno)); #endif // Copy the timestamps. We have several possible ways to do this, of // which some are better in both security and precision. // // First, get the nanosecond part of the timestamps. As of writing, // it's not standardized by POSIX, and there are several names for // the same thing in struct stat. long atime_nsec; long mtime_nsec; # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) // GNU and Solaris atime_nsec = pair->src_st.st_atim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) // BSD atime_nsec = pair->src_st.st_atimespec.tv_nsec; mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) // GNU and BSD without extensions atime_nsec = pair->src_st.st_atimensec; mtime_nsec = pair->src_st.st_mtimensec; # elif defined(HAVE_STRUCT_STAT_ST_UATIME) // Tru64 atime_nsec = pair->src_st.st_uatime * 1000; mtime_nsec = pair->src_st.st_umtime * 1000; # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) // UnixWare atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; # else // Safe fallback atime_nsec = 0; mtime_nsec = 0; # endif // Construct a structure to hold the timestamps and call appropriate // function to set the timestamps. #if defined(HAVE_FUTIMENS) // Use nanosecond precision. struct timespec tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_nsec = atime_nsec; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_nsec = mtime_nsec; (void)futimens(pair->dest_fd, tv); #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) // Use microsecond precision. struct timeval tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_usec = atime_nsec / 1000; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_usec = mtime_nsec / 1000; # if defined(HAVE_FUTIMES) (void)futimes(pair->dest_fd, tv); # elif defined(HAVE_FUTIMESAT) (void)futimesat(pair->dest_fd, NULL, tv); # else // Argh, no function to use a file descriptor to set the timestamp. (void)utimes(pair->dest_name, tv); # endif #elif defined(HAVE__FUTIME) // Use one-second precision with Windows-specific _futime(). // We could use utime() too except that for some reason the // timestamp will get reset at close(). With _futime() it works. // This struct cannot be const as _futime() takes a non-const pointer. struct _utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)_futime(pair->dest_fd, &buf); #elif defined(HAVE_UTIME) // Use one-second precision. utime() doesn't support using file // descriptor either. Some systems have broken utime() prototype // so don't make this const. struct utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)utime(pair->dest_name, &buf); #endif return; } /// Opens the source file. Returns false on success, true on error. static bool io_open_src_real(file_pair *pair) { // There's nothing to open when reading from stdin. if (pair->src_name == stdin_filename) { pair->src_fd = STDIN_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDIN_FILENO, O_BINARY); #else // Try to set stdin to non-blocking mode. It won't work // e.g. on OpenBSD if stdout is e.g. /dev/null. In such // case we proceed as if stdin were non-blocking anyway // (in case of /dev/null it will be in practice). The // same applies to stdout in io_open_dest_real(). stdin_flags = fcntl(STDIN_FILENO, F_GETFL); if (stdin_flags == -1) { message_error(_("Error getting the file status flags " "from standard input: %s"), strerror(errno)); return true; } if ((stdin_flags & O_NONBLOCK) == 0 && fcntl(STDIN_FILENO, F_SETFL, stdin_flags | O_NONBLOCK) != -1) restore_stdin_flags = true; #endif #ifdef HAVE_POSIX_FADVISE // It will fail if stdin is a pipe and that's fine. (void)posix_fadvise(STDIN_FILENO, 0, 0, opt_mode == MODE_LIST ? POSIX_FADV_RANDOM : POSIX_FADV_SEQUENTIAL); #endif return false; } // Symlinks are not followed unless writing to stdout or --force - // was used. - const bool follow_symlinks = opt_stdout || opt_force; + // or --keep was used. + const bool follow_symlinks + = opt_stdout || opt_force || opt_keep_original; // We accept only regular files if we are writing the output // to disk too. bzip2 allows overriding this with --force but // gzip and xz don't. const bool reg_files_only = !opt_stdout; // Flags for open() int flags = O_RDONLY | O_BINARY | O_NOCTTY; #ifndef TUKLIB_DOSLIKE // Use non-blocking I/O: // - It prevents blocking when opening FIFOs and some other // special files, which is good if we want to accept only // regular files. // - It can help avoiding some race conditions with signal handling. flags |= O_NONBLOCK; #endif #if defined(O_NOFOLLOW) if (!follow_symlinks) flags |= O_NOFOLLOW; #elif !defined(TUKLIB_DOSLIKE) // Some POSIX-like systems lack O_NOFOLLOW (it's not required // by POSIX). Check for symlinks with a separate lstat() on // these systems. if (!follow_symlinks) { struct stat st; if (lstat(pair->src_name, &st)) { message_error("%s: %s", pair->src_name, strerror(errno)); return true; } else if (S_ISLNK(st.st_mode)) { message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); return true; } } #else // Avoid warnings. (void)follow_symlinks; #endif // Try to open the file. Signals have been blocked so EINTR shouldn't // be possible. pair->src_fd = open(pair->src_name, flags); if (pair->src_fd == -1) { // Signals (that have a signal handler) have been blocked. assert(errno != EINTR); #ifdef O_NOFOLLOW // Give an understandable error message if the reason // for failing was that the file was a symbolic link. // // Note that at least Linux, OpenBSD, Solaris, and Darwin // use ELOOP to indicate that O_NOFOLLOW was the reason // that open() failed. Because there may be // directories in the pathname, ELOOP may occur also // because of a symlink loop in the directory part. // So ELOOP doesn't tell us what actually went wrong, // and this stupidity went into POSIX-1.2008 too. // // FreeBSD associates EMLINK with O_NOFOLLOW and // Tru64 uses ENOTSUP. We use these directly here // and skip the lstat() call and the associated race. // I want to hear if there are other kernels that // fail with something else than ELOOP with O_NOFOLLOW. bool was_symlink = false; # if defined(__FreeBSD__) || defined(__DragonFly__) if (errno == EMLINK) was_symlink = true; # elif defined(__digital__) && defined(__unix__) if (errno == ENOTSUP) was_symlink = true; # elif defined(__NetBSD__) if (errno == EFTYPE) was_symlink = true; # else if (errno == ELOOP && !follow_symlinks) { const int saved_errno = errno; struct stat st; if (lstat(pair->src_name, &st) == 0 && S_ISLNK(st.st_mode)) was_symlink = true; errno = saved_errno; } # endif if (was_symlink) message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); else #endif // Something else than O_NOFOLLOW failing // (assuming that the race conditions didn't // confuse us). message_error("%s: %s", pair->src_name, strerror(errno)); return true; } // Stat the source file. We need the result also when we copy // the permissions, and when unlinking. // // NOTE: Use stat() instead of fstat() with DJGPP, because // then we have a better chance to get st_ino value that can // be used in io_open_dest_real() to prevent overwriting the // source file. #ifdef __DJGPP__ if (stat(pair->src_name, &pair->src_st)) goto error_msg; #else if (fstat(pair->src_fd, &pair->src_st)) goto error_msg; #endif if (S_ISDIR(pair->src_st.st_mode)) { message_warning(_("%s: Is a directory, skipping"), pair->src_name); goto error; } if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { message_warning(_("%s: Not a regular file, skipping"), pair->src_name); goto error; } #ifndef TUKLIB_DOSLIKE - if (reg_files_only && !opt_force) { + if (reg_files_only && !opt_force && !opt_keep_original) { if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { // gzip rejects setuid and setgid files even // when --force was used. bzip2 doesn't check // for them, but calls fchown() after fchmod(), // and many systems automatically drop setuid // and setgid bits there. // // We accept setuid and setgid files if - // --force was used. We drop these bits + // --force or --keep was used. We drop these bits // explicitly in io_copy_attr(). message_warning(_("%s: File has setuid or " "setgid bit set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_mode & S_ISVTX) { message_warning(_("%s: File has sticky bit " "set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_nlink > 1) { message_warning(_("%s: Input file has more " "than one hard link, " "skipping"), pair->src_name); goto error; } } // If it is something else than a regular file, wait until // there is input available. This way reading from FIFOs // will work when open() is used with O_NONBLOCK. if (!S_ISREG(pair->src_st.st_mode)) { signals_unblock(); const io_wait_ret ret = io_wait(pair, -1, true); signals_block(); if (ret != IO_WAIT_MORE) goto error; } #endif #ifdef HAVE_POSIX_FADVISE // It will fail with some special files like FIFOs but that is fine. (void)posix_fadvise(pair->src_fd, 0, 0, opt_mode == MODE_LIST ? POSIX_FADV_RANDOM : POSIX_FADV_SEQUENTIAL); #endif return false; error_msg: message_error("%s: %s", pair->src_name, strerror(errno)); error: (void)close(pair->src_fd); return true; } extern file_pair * io_open_src(const char *src_name) { if (is_empty_filename(src_name)) return NULL; // Since we have only one file open at a time, we can use // a statically allocated structure. static file_pair pair; + // This implicitly also initializes src_st.st_size to zero + // which is expected to be <= 0 by default. fstat() isn't + // called when reading from standard input but src_st.st_size + // is still read. pair = (file_pair){ .src_name = src_name, .dest_name = NULL, .src_fd = -1, .dest_fd = -1, .src_eof = false, .src_has_seen_input = false, .flush_needed = false, .dest_try_sparse = false, .dest_pending_sparse = 0, }; // Block the signals, for which we have a custom signal handler, so // that we don't need to worry about EINTR. signals_block(); const bool error = io_open_src_real(&pair); signals_unblock(); #ifdef ENABLE_SANDBOX if (!error) io_sandbox_enter(pair.src_fd); #endif return error ? NULL : &pair; } /// \brief Closes source file of the file_pair structure /// /// \param pair File whose src_fd should be closed /// \param success If true, the file will be removed from the disk if /// closing succeeds and --keep hasn't been used. static void io_close_src(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE if (restore_stdin_flags) { assert(pair->src_fd == STDIN_FILENO); restore_stdin_flags = false; if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) message_error(_("Error restoring the status flags " "to standard input: %s"), strerror(errno)); } #endif if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { // Close the file before possibly unlinking it. On DOS-like // systems this is always required since unlinking will fail // if the file is open. On POSIX systems it usually works // to unlink open files, but in some cases it doesn't and // one gets EBUSY in errno. // // xz 5.2.2 and older unlinked the file before closing it // (except on DOS-like systems). The old code didn't handle // EBUSY and could fail e.g. on some CIFS shares. The // advantage of unlinking before closing is negligible // (avoids a race between close() and stat()/lstat() and // unlink()), so let's keep this simple. (void)close(pair->src_fd); if (success && !opt_keep_original) io_unlink(pair->src_name, &pair->src_st); } return; } static bool io_open_dest_real(file_pair *pair) { if (opt_stdout || pair->src_fd == STDIN_FILENO) { // We don't modify or free() this. pair->dest_name = (char *)"(stdout)"; pair->dest_fd = STDOUT_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDOUT_FILENO, O_BINARY); #else // Try to set O_NONBLOCK if it isn't already set. // If it fails, we assume that stdout is non-blocking // in practice. See the comments in io_open_src_real() // for similar situation with stdin. // // NOTE: O_APPEND may be unset later in this function // and it relies on stdout_flags being set here. stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); if (stdout_flags == -1) { message_error(_("Error getting the file status flags " "from standard output: %s"), strerror(errno)); return true; } if ((stdout_flags & O_NONBLOCK) == 0 && fcntl(STDOUT_FILENO, F_SETFL, stdout_flags | O_NONBLOCK) != -1) restore_stdout_flags = true; #endif } else { pair->dest_name = suffix_get_dest_name(pair->src_name); if (pair->dest_name == NULL) return true; #ifdef __DJGPP__ struct stat st; if (stat(pair->dest_name, &st) == 0) { // Check that it isn't a special file like "prn". if (st.st_dev == -1) { message_error("%s: Refusing to write to " "a DOS special file", pair->dest_name); free(pair->dest_name); return true; } // Check that we aren't overwriting the source file. if (st.st_dev == pair->src_st.st_dev && st.st_ino == pair->src_st.st_ino) { message_error("%s: Output file is the same " "as the input file", pair->dest_name); free(pair->dest_name); return true; } } #endif // If --force was used, unlink the target file first. if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { message_error(_("%s: Cannot remove: %s"), pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } // Open the file. int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL; #ifndef TUKLIB_DOSLIKE flags |= O_NONBLOCK; #endif const mode_t mode = S_IRUSR | S_IWUSR; pair->dest_fd = open(pair->dest_name, flags, mode); if (pair->dest_fd == -1) { message_error("%s: %s", pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } } #ifndef TUKLIB_DOSLIKE // dest_st isn't used on DOS-like systems except as a dummy // argument to io_unlink(), so don't fstat() on such systems. if (fstat(pair->dest_fd, &pair->dest_st)) { // If fstat() really fails, we have a safe fallback here. # if defined(__VMS) pair->dest_st.st_ino[0] = 0; pair->dest_st.st_ino[1] = 0; pair->dest_st.st_ino[2] = 0; # else pair->dest_st.st_dev = 0; pair->dest_st.st_ino = 0; # endif } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { // When writing to standard output, we need to be extra // careful: // - It may be connected to something else than // a regular file. // - We aren't necessarily writing to a new empty file // or to the end of an existing file. // - O_APPEND may be active. // // TODO: I'm keeping this disabled for DOS-like systems // for now. FAT doesn't support sparse files, but NTFS // does, so maybe this should be enabled on Windows after // some testing. if (pair->dest_fd == STDOUT_FILENO) { if (!S_ISREG(pair->dest_st.st_mode)) return false; if (stdout_flags & O_APPEND) { // Creating a sparse file is not possible // when O_APPEND is active (it's used by // shell's >> redirection). As I understand // it, it is safe to temporarily disable // O_APPEND in xz, because if someone // happened to write to the same file at the // same time, results would be bad anyway // (users shouldn't assume that xz uses any // specific block size when writing data). // // The write position may be something else // than the end of the file, so we must fix // it to start writing at the end of the file // to imitate O_APPEND. if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) return false; // Construct the new file status flags. // If O_NONBLOCK was set earlier in this // function, it must be kept here too. int flags = stdout_flags & ~O_APPEND; if (restore_stdout_flags) flags |= O_NONBLOCK; // If this fcntl() fails, we continue but won't // try to create sparse output. The original // flags will still be restored if needed (to // unset O_NONBLOCK) when the file is finished. if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) return false; // Disabling O_APPEND succeeded. Mark // that the flags should be restored // in io_close_dest(). (This may have already // been set when enabling O_NONBLOCK.) restore_stdout_flags = true; } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) != pair->dest_st.st_size) { // Writing won't start exactly at the end // of the file. We cannot use sparse output, // because it would probably corrupt the file. return false; } } pair->dest_try_sparse = true; } #endif return false; } extern bool io_open_dest(file_pair *pair) { signals_block(); const bool ret = io_open_dest_real(pair); signals_unblock(); return ret; } /// \brief Closes destination file of the file_pair structure /// /// \param pair File whose dest_fd should be closed /// \param success If false, the file will be removed from the disk. /// /// \return Zero if closing succeeds. On error, -1 is returned and /// error message printed. static bool io_close_dest(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE // If io_open_dest() has disabled O_APPEND, restore it here. if (restore_stdout_flags) { assert(pair->dest_fd == STDOUT_FILENO); restore_stdout_flags = false; if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { message_error(_("Error restoring the O_APPEND flag " "to standard output: %s"), strerror(errno)); return true; } } #endif if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) return false; if (close(pair->dest_fd)) { message_error(_("%s: Closing the file failed: %s"), pair->dest_name, strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return true; } // If the operation using this file wasn't successful, we git rid // of the junk file. if (!success) io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return false; } extern void io_close(file_pair *pair, bool success) { // Take care of sparseness at the end of the output file. if (success && pair->dest_try_sparse && pair->dest_pending_sparse > 0) { // Seek forward one byte less than the size of the pending // hole, then write one zero-byte. This way the file grows // to its correct size. An alternative would be to use // ftruncate() but that isn't portable enough (e.g. it // doesn't work with FAT on Linux; FAT isn't that important // since it doesn't support sparse files anyway, but we don't // want to create corrupt files on it). if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when trying " "to create a sparse file: %s"), pair->dest_name, strerror(errno)); success = false; } else { const uint8_t zero[1] = { '\0' }; if (io_write_buf(pair, zero, 1)) success = false; } } signals_block(); // Copy the file attributes. We need to skip this if destination // file isn't open or it is standard output. if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); // Close the destination first. If it fails, we must not remove // the source file! if (io_close_dest(pair, success)) success = false; // Close the source file, and unlink it if the operation using this // file pair was successful and we haven't requested to keep the // source file. io_close_src(pair, success); signals_unblock(); return; } extern void io_fix_src_pos(file_pair *pair, size_t rewind_size) { assert(rewind_size <= IO_BUFFER_SIZE); if (rewind_size > 0) { // This doesn't need to work on unseekable file descriptors, // so just ignore possible errors. (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); } return; } extern size_t io_read(file_pair *pair, io_buf *buf, size_t size) { // We use small buffers here. assert(size < SSIZE_MAX); size_t pos = 0; while (pos < size) { const ssize_t amount = read( pair->src_fd, buf->u8 + pos, size - pos); if (amount == 0) { pair->src_eof = true; break; } if (amount == -1) { if (errno == EINTR) { if (user_abort) return SIZE_MAX; continue; } #ifndef TUKLIB_DOSLIKE if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { // Disable the flush-timeout if no input has // been seen since the previous flush and thus // there would be nothing to flush after the // timeout expires (avoids busy waiting). const int timeout = pair->src_has_seen_input ? mytime_get_flush_timeout() : -1; switch (io_wait(pair, timeout, true)) { case IO_WAIT_MORE: continue; case IO_WAIT_ERROR: return SIZE_MAX; case IO_WAIT_TIMEOUT: pair->flush_needed = true; return pos; default: message_bug(); } } #endif message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); return SIZE_MAX; } pos += (size_t)(amount); if (!pair->src_has_seen_input) { pair->src_has_seen_input = true; mytime_set_flush_time(); } } return pos; } extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) { // Using lseek() and read() is more portable than pread() and // for us it is as good as real pread(). if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { message_error(_("%s: Error seeking the file: %s"), pair->src_name, strerror(errno)); return true; } const size_t amount = io_read(pair, buf, size); if (amount == SIZE_MAX) return true; if (amount != size) { message_error(_("%s: Unexpected end of file"), pair->src_name); return true; } return false; } static bool is_sparse(const io_buf *buf) { assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) if (buf->u64[i] != 0) return false; return true; } static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) { assert(size < SSIZE_MAX); while (size > 0) { const ssize_t amount = write(pair->dest_fd, buf, size); if (amount == -1) { if (errno == EINTR) { if (user_abort) return true; continue; } #ifndef TUKLIB_DOSLIKE if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { if (io_wait(pair, -1, false) == IO_WAIT_MORE) continue; return true; } #endif // Handle broken pipe specially. gzip and bzip2 // don't print anything on SIGPIPE. In addition, // gzip --quiet uses exit status 2 (warning) on // broken pipe instead of whatever raise(SIGPIPE) // would make it return. It is there to hide "Broken // pipe" message on some old shells (probably old // GNU bash). // // We don't do anything special with --quiet, which // is what bzip2 does too. If we get SIGPIPE, we // will handle it like other signals by setting // user_abort, and get EPIPE here. if (errno != EPIPE) message_error(_("%s: Write error: %s"), pair->dest_name, strerror(errno)); return true; } buf += (size_t)(amount); size -= (size_t)(amount); } return false; } extern bool io_write(file_pair *pair, const io_buf *buf, size_t size) { assert(size <= IO_BUFFER_SIZE); if (pair->dest_try_sparse) { // Check if the block is sparse (contains only zeros). If it // sparse, we just store the amount and return. We will take // care of actually skipping over the hole when we hit the // next data block or close the file. // // Since io_close() requires that dest_pending_sparse > 0 // if the file ends with sparse block, we must also return // if size == 0 to avoid doing the lseek(). if (size == IO_BUFFER_SIZE) { // Even if the block was sparse, treat it as non-sparse // if the pending sparse amount is large compared to // the size of off_t. In practice this only matters // on 32-bit systems where off_t isn't always 64 bits. const off_t pending_max = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); if (is_sparse(buf) && pair->dest_pending_sparse < pending_max) { pair->dest_pending_sparse += (off_t)(size); return false; } } else if (size == 0) { return false; } // This is not a sparse block. If we have a pending hole, // skip it now. if (pair->dest_pending_sparse > 0) { if (lseek(pair->dest_fd, pair->dest_pending_sparse, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when " "trying to create a sparse " "file: %s"), pair->dest_name, strerror(errno)); return true; } pair->dest_pending_sparse = 0; } } return io_write_buf(pair, buf->u8, size); } diff --git a/contrib/xz/src/xz/hardware.c b/contrib/xz/src/xz/hardware.c index e746cf91a76f..da3ed3f1cd11 100644 --- a/contrib/xz/src/xz/hardware.c +++ b/contrib/xz/src/xz/hardware.c @@ -1,180 +1,186 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file hardware.c /// \brief Detection of available hardware resources // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" /// Maximum number of worker threads. This can be set with /// the --threads=NUM command line option. static uint32_t threads_max = 1; /// Memory usage limit for compression static uint64_t memlimit_compress; /// Memory usage limit for decompression static uint64_t memlimit_decompress; /// Total amount of physical RAM static uint64_t total_ram; extern void hardware_threads_set(uint32_t n) { if (n == 0) { // Automatic number of threads was requested. // If threading support was enabled at build time, // use the number of available CPU cores. Otherwise // use one thread since disabling threading support // omits lzma_cputhreads() from liblzma. #ifdef MYTHREAD_ENABLED threads_max = lzma_cputhreads(); if (threads_max == 0) threads_max = 1; #else threads_max = 1; #endif } else { threads_max = n; } return; } extern uint32_t hardware_threads_get(void) { return threads_max; } extern void hardware_memlimit_set(uint64_t new_memlimit, bool set_compress, bool set_decompress, bool is_percentage) { if (is_percentage) { assert(new_memlimit > 0); assert(new_memlimit <= 100); new_memlimit = (uint32_t)new_memlimit * total_ram / 100; } if (set_compress) { memlimit_compress = new_memlimit; #if SIZE_MAX == UINT32_MAX // FIXME? // // When running a 32-bit xz on a system with a lot of RAM and // using a percentage-based memory limit, the result can be // bigger than the 32-bit address space. Limiting the limit // below SIZE_MAX for compression (not decompression) makes // xz lower the compression settings (or number of threads) // to a level that *might* work. In practice it has worked // when using a 64-bit kernel that gives full 4 GiB address // space to 32-bit programs. In other situations this might // still be too high, like 32-bit kernels that may give much // less than 4 GiB to a single application. // // So this is an ugly hack but I will keep it here while // it does more good than bad. // // Use a value less than SIZE_MAX so that there's some room // for the xz program and so on. Don't use 4000 MiB because // it could look like someone mixed up base-2 and base-10. +#ifdef __mips__ + // For MIPS32, due to architectural pecularities, + // the limit is even lower. + const uint64_t limit_max = UINT64_C(2000) << 20; +#else const uint64_t limit_max = UINT64_C(4020) << 20; +#endif // UINT64_MAX is a special case for the string "max" so // that has to be handled specially. if (memlimit_compress != UINT64_MAX && memlimit_compress > limit_max) memlimit_compress = limit_max; #endif } if (set_decompress) memlimit_decompress = new_memlimit; return; } extern uint64_t hardware_memlimit_get(enum operation_mode mode) { // Zero is a special value that indicates the default. Currently // the default simply disables the limit. Once there is threading // support, this might be a little more complex, because there will // probably be a special case where a user asks for "optimal" number // of threads instead of a specific number (this might even become // the default mode). Each thread may use a significant amount of // memory. When there are no memory usage limits set, we need some // default soft limit for calculating the "optimal" number of // threads. const uint64_t memlimit = mode == MODE_COMPRESS ? memlimit_compress : memlimit_decompress; return memlimit != 0 ? memlimit : UINT64_MAX; } /// Helper for hardware_memlimit_show() to print one human-readable info line. static void memlimit_show(const char *str, uint64_t value) { // The memory usage limit is considered to be disabled if value // is 0 or UINT64_MAX. This might get a bit more complex once there // is threading support. See the comment in hardware_memlimit_get(). if (value == 0 || value == UINT64_MAX) printf("%s %s\n", str, _("Disabled")); else printf("%s %s MiB (%s B)\n", str, uint64_to_str(round_up_to_mib(value), 0), uint64_to_str(value, 1)); return; } extern void hardware_memlimit_show(void) { if (opt_robot) { printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\n", total_ram, memlimit_compress, memlimit_decompress); } else { // TRANSLATORS: Test with "xz --info-memory" to see if // the alignment looks nice. memlimit_show(_("Total amount of physical memory (RAM): "), total_ram); memlimit_show(_("Memory usage limit for compression: "), memlimit_compress); memlimit_show(_("Memory usage limit for decompression: "), memlimit_decompress); } tuklib_exit(E_SUCCESS, E_ERROR, message_verbosity_get() != V_SILENT); } extern void hardware_init(void) { // Get the amount of RAM. If we cannot determine it, // use the assumption defined by the configure script. total_ram = lzma_physmem(); if (total_ram == 0) total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; // Set the defaults. hardware_memlimit_set(0, true, true, false); return; } diff --git a/contrib/xz/src/xz/message.c b/contrib/xz/src/xz/message.c index aa915d2d60d1..da90420fb897 100644 --- a/contrib/xz/src/xz/message.c +++ b/contrib/xz/src/xz/message.c @@ -1,1272 +1,1272 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file message.c /// \brief Printing messages // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include /// Number of the current file static unsigned int files_pos = 0; /// Total number of input files; zero if unknown. static unsigned int files_total; /// Verbosity level static enum message_verbosity verbosity = V_WARNING; /// Filename which we will print with the verbose messages static const char *filename; /// True once the a filename has been printed to stderr as part of progress /// message. If automatic progress updating isn't enabled, this becomes true /// after the first progress message has been printed due to user sending /// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print /// an empty line before the next filename to make the output more readable. static bool first_filename_printed = false; /// This is set to true when we have printed the current filename to stderr /// as part of a progress message. This variable is useful only if not /// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or /// SIGALRM signals, we won't print the name of the same file multiple times. static bool current_filename_printed = false; /// True if we should print progress indicator and update it automatically /// if also verbose >= V_VERBOSE. static bool progress_automatic; /// True if message_progress_start() has been called but /// message_progress_end() hasn't been called yet. static bool progress_started = false; /// This is true when a progress message was printed and the cursor is still /// on the same line with the progress message. In that case, a newline has /// to be printed before any error messages. static bool progress_active = false; /// Pointer to lzma_stream used to do the encoding or decoding. static lzma_stream *progress_strm; /// This is true if we are in passthru mode (not actually compressing or /// decompressing) and thus cannot use lzma_get_progress(progress_strm, ...). /// That is, we are using coder_passthru() in coder.c. static bool progress_is_from_passthru; /// Expected size of the input stream is needed to show completion percentage /// and estimate remaining time. static uint64_t expected_in_size; // Use alarm() and SIGALRM when they are supported. This has two minor // advantages over the alternative of polling gettimeofday(): // - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to // get intermediate progress information even when --verbose wasn't used // or stderr is not a terminal. // - alarm() + SIGALRM seems to have slightly less overhead than polling // gettimeofday(). #ifdef SIGALRM const int message_progress_sigs[] = { SIGALRM, #ifdef SIGINFO SIGINFO, #endif #ifdef SIGUSR1 SIGUSR1, #endif 0 }; /// The signal handler for SIGALRM sets this to true. It is set back to false /// once the progress message has been updated. static volatile sig_atomic_t progress_needs_updating = false; /// Signal handler for SIGALRM static void progress_signal_handler(int sig lzma_attribute((__unused__))) { progress_needs_updating = true; return; } #else /// This is true when progress message printing is wanted. Using the same /// variable name as above to avoid some ifdefs. static bool progress_needs_updating = false; /// Elapsed time when the next progress message update should be done. static uint64_t progress_next_update; #endif extern void message_init(void) { // If --verbose is used, we use a progress indicator if and only // if stderr is a terminal. If stderr is not a terminal, we print // verbose information only after finishing the file. As a special // exception, even if --verbose was not used, user can send SIGALRM // to make us print progress information once without automatic // updating. progress_automatic = isatty(STDERR_FILENO); // Commented out because COLUMNS is rarely exported to environment. // Most users have at least 80 columns anyway, let's think something // fancy here if enough people complain. /* if (progress_automatic) { // stderr is a terminal. Check the COLUMNS environment // variable to see if the terminal is wide enough. If COLUMNS // doesn't exist or it has some unparsable value, we assume // that the terminal is wide enough. const char *columns_str = getenv("COLUMNS"); if (columns_str != NULL) { char *endptr; const long columns = strtol(columns_str, &endptr, 10); if (*endptr != '\0' || columns < 80) progress_automatic = false; } } */ #ifdef SIGALRM // Establish the signal handlers which set a flag to tell us that // progress info should be updated. struct sigaction sa; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sa.sa_handler = &progress_signal_handler; for (size_t i = 0; message_progress_sigs[i] != 0; ++i) if (sigaction(message_progress_sigs[i], &sa, NULL)) message_signal_handler(); #endif return; } extern void message_verbosity_increase(void) { if (verbosity < V_DEBUG) ++verbosity; return; } extern void message_verbosity_decrease(void) { if (verbosity > V_SILENT) --verbosity; return; } extern enum message_verbosity message_verbosity_get(void) { return verbosity; } extern void message_set_files(unsigned int files) { files_total = files; return; } /// Prints the name of the current file if it hasn't been printed already, /// except if we are processing exactly one stream from stdin to stdout. /// I think it looks nicer to not print "(stdin)" when --verbose is used /// in a pipe and no other files are processed. static void print_filename(void) { if (!opt_robot && (files_total != 1 || filename != stdin_filename)) { signals_block(); FILE *file = opt_mode == MODE_LIST ? stdout : stderr; // If a file was already processed, put an empty line // before the next filename to improve readability. if (first_filename_printed) fputc('\n', file); first_filename_printed = true; current_filename_printed = true; // If we don't know how many files there will be due // to usage of --files or --files0. if (files_total == 0) fprintf(file, "%s (%u)\n", filename, files_pos); else fprintf(file, "%s (%u/%u)\n", filename, files_pos, files_total); signals_unblock(); } return; } extern void message_filename(const char *src_name) { // Start numbering the files starting from one. ++files_pos; filename = src_name; if (verbosity >= V_VERBOSE && (progress_automatic || opt_mode == MODE_LIST)) print_filename(); else current_filename_printed = false; return; } extern void message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size) { // Store the pointer to the lzma_stream used to do the coding. // It is needed to find out the position in the stream. progress_strm = strm; progress_is_from_passthru = is_passthru; // Store the expected size of the file. If we aren't printing any // statistics, then is will be unused. But since it is possible // that the user sends us a signal to show statistics, we need // to have it available anyway. expected_in_size = in_size; // Indicate that progress info may need to be printed before // printing error messages. progress_started = true; // If progress indicator is wanted, print the filename and possibly // the file count now. if (verbosity >= V_VERBOSE && progress_automatic) { // Start the timer to display the first progress message // after one second. An alternative would be to show the // first message almost immediately, but delaying by one // second looks better to me, since extremely early // progress info is pretty much useless. #ifdef SIGALRM // First disable a possibly existing alarm. alarm(0); progress_needs_updating = false; alarm(1); #else progress_needs_updating = true; progress_next_update = 1000; #endif } return; } /// Make the string indicating completion percentage. static const char * progress_percentage(uint64_t in_pos) { // If the size of the input file is unknown or the size told us is // clearly wrong since we have processed more data than the alleged // size of the file, show a static string indicating that we have // no idea of the completion percentage. if (expected_in_size == 0 || in_pos > expected_in_size) return "--- %"; // Never show 100.0 % before we actually are finished. double percentage = (double)(in_pos) / (double)(expected_in_size) * 99.9; // Use big enough buffer to hold e.g. a multibyte decimal point. static char buf[16]; snprintf(buf, sizeof(buf), "%.1f %%", percentage); return buf; } /// Make the string containing the amount of input processed, amount of /// output produced, and the compression ratio. static const char * progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) { // Use big enough buffer to hold e.g. a multibyte thousand separators. static char buf[128]; char *pos = buf; size_t left = sizeof(buf); // Print the sizes. If this the final message, use more reasonable // units than MiB if the file was small. const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; my_snprintf(&pos, &left, "%s / %s", uint64_to_nicestr(compressed_pos, unit_min, NICESTR_TIB, false, 0), uint64_to_nicestr(uncompressed_pos, unit_min, NICESTR_TIB, false, 1)); // Avoid division by zero. If we cannot calculate the ratio, set // it to some nice number greater than 10.0 so that it gets caught // in the next if-clause. const double ratio = uncompressed_pos > 0 ? (double)(compressed_pos) / (double)(uncompressed_pos) : 16.0; // If the ratio is very bad, just indicate that it is greater than // 9.999. This way the length of the ratio field stays fixed. if (ratio > 9.999) snprintf(pos, left, " > %.3f", 9.999); else snprintf(pos, left, " = %.3f", ratio); return buf; } /// Make the string containing the processing speed of uncompressed data. static const char * progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) { // Don't print the speed immediately, since the early values look // somewhat random. if (elapsed < 3000) return ""; - static const char unit[][8] = { - "KiB/s", - "MiB/s", - "GiB/s", - }; + // The first character of KiB/s, MiB/s, or GiB/s: + static const char unit[] = { 'K', 'M', 'G' }; size_t unit_index = 0; // Calculate the speed as KiB/s. double speed = (double)(uncompressed_pos) / ((double)(elapsed) * (1024.0 / 1000.0)); // Adjust the unit of the speed if needed. while (speed > 999.0) { speed /= 1024.0; if (++unit_index == ARRAY_SIZE(unit)) return ""; // Way too fast ;-) } // Use decimal point only if the number is small. Examples: // - 0.1 KiB/s // - 9.9 KiB/s // - 99 KiB/s // - 999 KiB/s // Use big enough buffer to hold e.g. a multibyte decimal point. static char buf[16]; - snprintf(buf, sizeof(buf), "%.*f %s", + snprintf(buf, sizeof(buf), "%.*f %ciB/s", speed > 9.9 ? 0 : 1, speed, unit[unit_index]); return buf; } /// Make a string indicating elapsed time. The format is either /// M:SS or H:MM:SS depending on if the time is an hour or more. static const char * progress_time(uint64_t mseconds) { // 9999 hours = 416 days static char buf[sizeof("9999:59:59")]; // 32-bit variable is enough for elapsed time (136 years). uint32_t seconds = (uint32_t)(mseconds / 1000); // Don't show anything if the time is zero or ridiculously big. if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) return ""; uint32_t minutes = seconds / 60; seconds %= 60; if (minutes >= 60) { const uint32_t hours = minutes / 60; minutes %= 60; snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, hours, minutes, seconds); } else { snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, minutes, seconds); } return buf; } /// Return a string containing estimated remaining time when /// reasonably possible. static const char * progress_remaining(uint64_t in_pos, uint64_t elapsed) { // Don't show the estimated remaining time when it wouldn't // make sense: // - Input size is unknown. // - Input has grown bigger since we started (de)compressing. // - We haven't processed much data yet, so estimate would be // too inaccurate. // - Only a few seconds has passed since we started (de)compressing, // so estimate would be too inaccurate. if (expected_in_size == 0 || in_pos > expected_in_size || in_pos < (UINT64_C(1) << 19) || elapsed < 8000) return ""; // Calculate the estimate. Don't give an estimate of zero seconds, // since it is possible that all the input has been already passed // to the library, but there is still quite a bit of output pending. uint32_t remaining = (uint32_t)((double)(expected_in_size - in_pos) * ((double)(elapsed) / 1000.0) / (double)(in_pos)); if (remaining < 1) remaining = 1; static char buf[sizeof("9 h 55 min")]; // Select appropriate precision for the estimated remaining time. if (remaining <= 10) { // A maximum of 10 seconds remaining. // Show the number of seconds as is. snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); } else if (remaining <= 50) { // A maximum of 50 seconds remaining. // Round up to the next multiple of five seconds. remaining = (remaining + 4) / 5 * 5; snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); } else if (remaining <= 590) { // A maximum of 9 minutes and 50 seconds remaining. // Round up to the next multiple of ten seconds. remaining = (remaining + 9) / 10 * 10; snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", remaining / 60, remaining % 60); } else if (remaining <= 59 * 60) { // A maximum of 59 minutes remaining. // Round up to the next multiple of a minute. remaining = (remaining + 59) / 60; snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); } else if (remaining <= 9 * 3600 + 50 * 60) { // A maximum of 9 hours and 50 minutes left. // Round up to the next multiple of ten minutes. remaining = (remaining + 599) / 600 * 10; snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", remaining / 60, remaining % 60); } else if (remaining <= 23 * 3600) { // A maximum of 23 hours remaining. // Round up to the next multiple of an hour. remaining = (remaining + 3599) / 3600; snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { // A maximum of 9 days and 23 hours remaining. // Round up to the next multiple of an hour. remaining = (remaining + 3599) / 3600; snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", remaining / 24, remaining % 24); } else if (remaining <= 999 * 24 * 3600) { // A maximum of 999 days remaining. ;-) // Round up to the next multiple of a day. remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); } else { // The estimated remaining time is too big. Don't show it. return ""; } return buf; } /// Get how much uncompressed and compressed data has been processed. static void progress_pos(uint64_t *in_pos, uint64_t *compressed_pos, uint64_t *uncompressed_pos) { uint64_t out_pos; if (progress_is_from_passthru) { // In passthru mode the progress info is in total_in/out but // the *progress_strm itself isn't initialized and thus we // cannot use lzma_get_progress(). *in_pos = progress_strm->total_in; out_pos = progress_strm->total_out; } else { lzma_get_progress(progress_strm, in_pos, &out_pos); } // It cannot have processed more input than it has been given. assert(*in_pos <= progress_strm->total_in); // It cannot have produced more output than it claims to have ready. assert(out_pos >= progress_strm->total_out); if (opt_mode == MODE_COMPRESS) { *compressed_pos = out_pos; *uncompressed_pos = *in_pos; } else { *compressed_pos = *in_pos; *uncompressed_pos = out_pos; } return; } extern void message_progress_update(void) { if (!progress_needs_updating) return; // Calculate how long we have been processing this file. const uint64_t elapsed = mytime_get_elapsed(); #ifndef SIGALRM if (progress_next_update > elapsed) return; progress_next_update = elapsed + 1000; #endif // Get our current position in the stream. uint64_t in_pos; uint64_t compressed_pos; uint64_t uncompressed_pos; progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); // Block signals so that fprintf() doesn't get interrupted. signals_block(); // Print the filename if it hasn't been printed yet. if (!current_filename_printed) print_filename(); // Print the actual progress message. The idea is that there is at // least three spaces between the fields in typical situations, but // even in rare situations there is at least one space. const char *cols[5] = { progress_percentage(in_pos), progress_sizes(compressed_pos, uncompressed_pos, false), progress_speed(uncompressed_pos, elapsed), progress_time(elapsed), progress_remaining(in_pos, elapsed), }; fprintf(stderr, "\r %*s %*s %*s %10s %10s\r", tuklib_mbstr_fw(cols[0], 6), cols[0], tuklib_mbstr_fw(cols[1], 35), cols[1], tuklib_mbstr_fw(cols[2], 9), cols[2], cols[3], cols[4]); #ifdef SIGALRM // Updating the progress info was finished. Reset // progress_needs_updating to wait for the next SIGALRM. // // NOTE: This has to be done before alarm(1) or with (very) bad // luck we could be setting this to false after the alarm has already // been triggered. progress_needs_updating = false; if (verbosity >= V_VERBOSE && progress_automatic) { // Mark that the progress indicator is active, so if an error // occurs, the error message gets printed cleanly. progress_active = true; // Restart the timer so that progress_needs_updating gets // set to true after about one second. alarm(1); } else { // The progress message was printed because user had sent us // SIGALRM. In this case, each progress message is printed // on its own line. fputc('\n', stderr); } #else // When SIGALRM isn't supported and we get here, it's always due to // automatic progress update. We set progress_active here too like // described above. assert(verbosity >= V_VERBOSE); assert(progress_automatic); progress_active = true; #endif signals_unblock(); return; } static void progress_flush(bool finished) { if (!progress_started || verbosity < V_VERBOSE) return; uint64_t in_pos; uint64_t compressed_pos; uint64_t uncompressed_pos; progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); // Avoid printing intermediate progress info if some error occurs // in the beginning of the stream. (If something goes wrong later in // the stream, it is sometimes useful to tell the user where the // error approximately occurred, especially if the error occurs // after a time-consuming operation.) if (!finished && !progress_active && (compressed_pos == 0 || uncompressed_pos == 0)) return; progress_active = false; const uint64_t elapsed = mytime_get_elapsed(); signals_block(); // When using the auto-updating progress indicator, the final // statistics are printed in the same format as the progress // indicator itself. if (progress_automatic) { const char *cols[5] = { finished ? "100 %" : progress_percentage(in_pos), progress_sizes(compressed_pos, uncompressed_pos, true), progress_speed(uncompressed_pos, elapsed), progress_time(elapsed), finished ? "" : progress_remaining(in_pos, elapsed), }; fprintf(stderr, "\r %*s %*s %*s %10s %10s\n", tuklib_mbstr_fw(cols[0], 6), cols[0], tuklib_mbstr_fw(cols[1], 35), cols[1], tuklib_mbstr_fw(cols[2], 9), cols[2], cols[3], cols[4]); } else { // The filename is always printed. fprintf(stderr, "%s: ", filename); // Percentage is printed only if we didn't finish yet. if (!finished) { // Don't print the percentage when it isn't known // (starts with a dash). const char *percentage = progress_percentage(in_pos); if (percentage[0] != '-') fprintf(stderr, "%s, ", percentage); } // Size information is always printed. fprintf(stderr, "%s", progress_sizes( compressed_pos, uncompressed_pos, true)); // The speed and elapsed time aren't always shown. const char *speed = progress_speed(uncompressed_pos, elapsed); if (speed[0] != '\0') fprintf(stderr, ", %s", speed); const char *elapsed_str = progress_time(elapsed); if (elapsed_str[0] != '\0') fprintf(stderr, ", %s", elapsed_str); fputc('\n', stderr); } signals_unblock(); return; } extern void message_progress_end(bool success) { assert(progress_started); progress_flush(success); progress_started = false; return; } static void vmessage(enum message_verbosity v, const char *fmt, va_list ap) { if (v <= verbosity) { signals_block(); progress_flush(false); // TRANSLATORS: This is the program name in the beginning // of the line in messages. Usually it becomes "xz: ". // This is a translatable string because French needs // a space before a colon. fprintf(stderr, _("%s: "), progname); vfprintf(stderr, fmt, ap); fputc('\n', stderr); signals_unblock(); } return; } extern void message(enum message_verbosity v, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(v, fmt, ap); va_end(ap); return; } extern void message_warning(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_WARNING, fmt, ap); va_end(ap); set_exit_status(E_WARNING); return; } extern void message_error(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_ERROR, fmt, ap); va_end(ap); set_exit_status(E_ERROR); return; } extern void message_fatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vmessage(V_ERROR, fmt, ap); va_end(ap); tuklib_exit(E_ERROR, E_ERROR, false); } extern void message_bug(void) { message_fatal(_("Internal error (bug)")); } extern void message_signal_handler(void) { message_fatal(_("Cannot establish signal handlers")); } extern const char * message_strm(lzma_ret code) { switch (code) { case LZMA_NO_CHECK: return _("No integrity check; not verifying file integrity"); case LZMA_UNSUPPORTED_CHECK: return _("Unsupported type of integrity check; " "not verifying file integrity"); case LZMA_MEM_ERROR: return strerror(ENOMEM); case LZMA_MEMLIMIT_ERROR: return _("Memory usage limit reached"); case LZMA_FORMAT_ERROR: return _("File format not recognized"); case LZMA_OPTIONS_ERROR: return _("Unsupported options"); case LZMA_DATA_ERROR: return _("Compressed data is corrupt"); case LZMA_BUF_ERROR: return _("Unexpected end of input"); case LZMA_OK: case LZMA_STREAM_END: case LZMA_GET_CHECK: case LZMA_PROG_ERROR: // Without "default", compiler will warn if new constants // are added to lzma_ret, it is not too easy to forget to // add the new constants to this function. break; } return _("Internal error (bug)"); } extern void message_mem_needed(enum message_verbosity v, uint64_t memusage) { if (v > verbosity) return; // Convert memusage to MiB, rounding up to the next full MiB. // This way the user can always use the displayed usage as // the new memory usage limit. (If we rounded to the nearest, // the user might need to +1 MiB to get high enough limit.) memusage = round_up_to_mib(memusage); uint64_t memlimit = hardware_memlimit_get(opt_mode); // Handle the case when there is no memory usage limit. // This way we don't print a weird message with a huge number. if (memlimit == UINT64_MAX) { message(v, _("%s MiB of memory is required. " "The limiter is disabled."), uint64_to_str(memusage, 0)); return; } // With US-ASCII: // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 // But there may be multibyte chars so reserve enough space. char memlimitstr[128]; // Show the memory usage limit as MiB unless it is less than 1 MiB. // This way it's easy to notice errors where one has typed // --memory=123 instead of --memory=123MiB. if (memlimit < (UINT32_C(1) << 20)) { snprintf(memlimitstr, sizeof(memlimitstr), "%s B", uint64_to_str(memlimit, 1)); } else { // Round up just like with memusage. If this function is // called for informational purposes (to just show the // current usage and limit), we should never show that // the usage is higher than the limit, which would give // a false impression that the memory usage limit isn't // properly enforced. snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", uint64_to_str(round_up_to_mib(memlimit), 1)); } message(v, _("%s MiB of memory is required. The limit is %s."), uint64_to_str(memusage, 0), memlimitstr); return; } /// \brief Convert uint32_t to a nice string for --lzma[12]=dict=SIZE /// /// The idea is to use KiB or MiB suffix when possible. static const char * uint32_to_optstr(uint32_t num) { static char buf[16]; if ((num & ((UINT32_C(1) << 20) - 1)) == 0) snprintf(buf, sizeof(buf), "%" PRIu32 "MiB", num >> 20); else if ((num & ((UINT32_C(1) << 10) - 1)) == 0) snprintf(buf, sizeof(buf), "%" PRIu32 "KiB", num >> 10); else snprintf(buf, sizeof(buf), "%" PRIu32, num); return buf; } extern void message_filters_to_str(char buf[FILTERS_STR_SIZE], const lzma_filter *filters, bool all_known) { char *pos = buf; size_t left = FILTERS_STR_SIZE; for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { // Add the dashes for the filter option. A space is // needed after the first and later filters. my_snprintf(&pos, &left, "%s", i == 0 ? "--" : " --"); switch (filters[i].id) { case LZMA_FILTER_LZMA1: case LZMA_FILTER_LZMA2: { const lzma_options_lzma *opt = filters[i].options; const char *mode = NULL; const char *mf = NULL; if (all_known) { switch (opt->mode) { case LZMA_MODE_FAST: mode = "fast"; break; case LZMA_MODE_NORMAL: mode = "normal"; break; default: mode = "UNKNOWN"; break; } switch (opt->mf) { case LZMA_MF_HC3: mf = "hc3"; break; case LZMA_MF_HC4: mf = "hc4"; break; case LZMA_MF_BT2: mf = "bt2"; break; case LZMA_MF_BT3: mf = "bt3"; break; case LZMA_MF_BT4: mf = "bt4"; break; default: mf = "UNKNOWN"; break; } } // Add the filter name and dictionary size, which // is always known. my_snprintf(&pos, &left, "lzma%c=dict=%s", filters[i].id == LZMA_FILTER_LZMA2 ? '2' : '1', uint32_to_optstr(opt->dict_size)); // With LZMA1 also lc/lp/pb are known when // decompressing, but this function is never // used to print information about .lzma headers. assert(filters[i].id == LZMA_FILTER_LZMA2 || all_known); // Print the rest of the options, which are known // only when compressing. if (all_known) my_snprintf(&pos, &left, ",lc=%" PRIu32 ",lp=%" PRIu32 ",pb=%" PRIu32 ",mode=%s,nice=%" PRIu32 ",mf=%s" ",depth=%" PRIu32, opt->lc, opt->lp, opt->pb, mode, opt->nice_len, mf, opt->depth); break; } case LZMA_FILTER_X86: case LZMA_FILTER_POWERPC: case LZMA_FILTER_IA64: case LZMA_FILTER_ARM: case LZMA_FILTER_ARMTHUMB: case LZMA_FILTER_SPARC: { static const char bcj_names[][9] = { "x86", "powerpc", "ia64", "arm", "armthumb", "sparc", }; const lzma_options_bcj *opt = filters[i].options; my_snprintf(&pos, &left, "%s", bcj_names[filters[i].id - LZMA_FILTER_X86]); // Show the start offset only when really needed. if (opt != NULL && opt->start_offset != 0) my_snprintf(&pos, &left, "=start=%" PRIu32, opt->start_offset); break; } case LZMA_FILTER_DELTA: { const lzma_options_delta *opt = filters[i].options; my_snprintf(&pos, &left, "delta=dist=%" PRIu32, opt->dist); break; } default: // This should be possible only if liblzma is // newer than the xz tool. my_snprintf(&pos, &left, "UNKNOWN"); break; } } return; } extern void message_filters_show(enum message_verbosity v, const lzma_filter *filters) { if (v > verbosity) return; char buf[FILTERS_STR_SIZE]; message_filters_to_str(buf, filters, true); fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf); return; } extern void message_try_help(void) { // Print this with V_WARNING instead of V_ERROR to prevent it from // showing up when --quiet has been specified. message(V_WARNING, _("Try `%s --help' for more information."), progname); return; } extern void message_version(void) { // It is possible that liblzma version is different than the command // line tool version, so print both. if (opt_robot) { printf("XZ_VERSION=%" PRIu32 "\nLIBLZMA_VERSION=%" PRIu32 "\n", LZMA_VERSION, lzma_version_number()); } else { printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); printf("liblzma %s\n", lzma_version_string()); } tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } extern void message_help(bool long_help) { printf(_("Usage: %s [OPTION]... [FILE]...\n" "Compress or decompress FILEs in the .xz format.\n\n"), progname); // NOTE: The short help doesn't currently have options that // take arguments. if (long_help) puts(_("Mandatory arguments to long options are mandatory " "for short options too.\n")); if (long_help) puts(_(" Operation mode:\n")); puts(_( " -z, --compress force compression\n" " -d, --decompress force decompression\n" " -t, --test test compressed file integrity\n" " -l, --list list information about .xz files")); if (long_help) puts(_("\n Operation modifiers:\n")); puts(_( " -k, --keep keep (don't delete) input files\n" " -f, --force force overwrite of output file and (de)compress links\n" " -c, --stdout write to standard output and don't delete input files")); + // NOTE: --to-stdout isn't included above because it's not + // the recommended spelling. It was copied from gzip but other + // compressors with gzip-like syntax don't support it. if (long_help) { puts(_( " --single-stream decompress only the first stream, and silently\n" " ignore possible remaining input data")); puts(_( " --no-sparse do not create sparse files when decompressing\n" " -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" " --files[=FILE] read filenames to process from FILE; if FILE is\n" " omitted, filenames are read from the standard input;\n" " filenames must be terminated with the newline character\n" " --files0[=FILE] like --files but use the null character as terminator")); } if (long_help) { puts(_("\n Basic file format and compression options:\n")); puts(_( " -F, --format=FMT file format to encode or decode; possible values are\n" " `auto' (default), `xz', `lzma', and `raw'\n" " -C, --check=CHECK integrity check type: `none' (use with caution),\n" " `crc32', `crc64' (default), or `sha256'")); puts(_( " --ignore-check don't verify the integrity check when decompressing")); } puts(_( " -0 ... -9 compression preset; default is 6; take compressor *and*\n" " decompressor memory usage into account before using 7-9!")); puts(_( " -e, --extreme try to improve compression ratio by using more CPU time;\n" " does not affect decompressor memory requirements")); puts(_( " -T, --threads=NUM use at most NUM threads; the default is 1; set to 0\n" " to use as many threads as there are processor cores")); if (long_help) { puts(_( " --block-size=SIZE\n" " start a new .xz block after every SIZE bytes of input;\n" " use this to set the block size for threaded compression")); puts(_( " --block-list=SIZES\n" " start a new .xz block after the given comma-separated\n" " intervals of uncompressed data")); puts(_( " --flush-timeout=TIMEOUT\n" " when compressing, if more than TIMEOUT milliseconds has\n" " passed since the previous flush and reading more input\n" " would block, all pending data is flushed out" )); puts(_( // xgettext:no-c-format " --memlimit-compress=LIMIT\n" " --memlimit-decompress=LIMIT\n" " -M, --memlimit=LIMIT\n" " set memory usage limit for compression, decompression,\n" " or both; LIMIT is in bytes, % of RAM, or 0 for defaults")); puts(_( " --no-adjust if compression settings exceed the memory usage limit,\n" " give an error instead of adjusting the settings downwards")); } if (long_help) { puts(_( "\n Custom filter chain for compression (alternative for using presets):")); #if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) // TRANSLATORS: The word "literal" in "literal context bits" // means how many "context bits" to use when encoding // literals. A literal is a single 8-bit byte. It doesn't // mean "literally" here. puts(_( "\n" " --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" " --lzma2[=OPTS] more of the following options (valid values; default):\n" " preset=PRE reset options to a preset (0-9[e])\n" " dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" " lc=NUM number of literal context bits (0-4; 3)\n" " lp=NUM number of literal position bits (0-4; 0)\n" " pb=NUM number of position bits (0-4; 2)\n" " mode=MODE compression mode (fast, normal; normal)\n" " nice=NUM nice length of a match (2-273; 64)\n" " mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" " depth=NUM maximum search depth; 0=automatic (default)")); #endif puts(_( "\n" " --x86[=OPTS] x86 BCJ filter (32-bit and 64-bit)\n" " --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" " --ia64[=OPTS] IA-64 (Itanium) BCJ filter\n" " --arm[=OPTS] ARM BCJ filter (little endian only)\n" " --armthumb[=OPTS] ARM-Thumb BCJ filter (little endian only)\n" " --sparc[=OPTS] SPARC BCJ filter\n" " Valid OPTS for all BCJ filters:\n" " start=NUM start offset for conversions (default=0)")); #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) puts(_( "\n" " --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" " dist=NUM distance between bytes being subtracted\n" " from each other (1-256; 1)")); #endif } if (long_help) puts(_("\n Other options:\n")); puts(_( " -q, --quiet suppress warnings; specify twice to suppress errors too\n" " -v, --verbose be verbose; specify twice for even more verbose")); if (long_help) { puts(_( " -Q, --no-warn make warnings not affect the exit status")); puts(_( " --robot use machine-parsable messages (useful for scripts)")); puts(""); puts(_( " --info-memory display the total amount of RAM and the currently active\n" " memory usage limits, and exit")); puts(_( " -h, --help display the short help (lists only the basic options)\n" " -H, --long-help display this long help and exit")); } else { puts(_( " -h, --help display this short help and exit\n" " -H, --long-help display the long help (lists also the advanced options)")); } puts(_( " -V, --version display the version number and exit")); puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); // TRANSLATORS: This message indicates the bug reporting address // for this package. Please add _another line_ saying // "Report translation bugs to <...>\n" with the email or WWW // address for translation bugs. Thanks. printf(_("Report bugs to <%s> (in English or Finnish).\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); #if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE puts(_( "THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE.")); #endif tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); } diff --git a/contrib/xz/src/xz/xz.1 b/contrib/xz/src/xz/xz.1 index 540d1364dccb..066b13a213eb 100644 --- a/contrib/xz/src/xz/xz.1 +++ b/contrib/xz/src/xz/xz.1 @@ -1,2805 +1,2837 @@ '\" t .\" .\" Author: Lasse Collin .\" .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH XZ 1 "2020-02-01" "Tukaani" "XZ Utils" +.TH XZ 1 "2022-10-25" "Tukaani" "XZ Utils" . .SH NAME xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files . .SH SYNOPSIS .B xz .RI [ option... ] .RI [ file... ] . .SH COMMAND ALIASES .B unxz is equivalent to .BR "xz \-\-decompress" . .br .B xzcat is equivalent to .BR "xz \-\-decompress \-\-stdout" . .br .B lzma is equivalent to .BR "xz \-\-format=lzma" . .br .B unlzma is equivalent to .BR "xz \-\-format=lzma \-\-decompress" . .br .B lzcat is equivalent to .BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . .PP When writing scripts that need to decompress files, it is recommended to always use the name .B xz with appropriate arguments .RB ( "xz \-d" or .BR "xz \-dc" ) instead of the names .B unxz and .BR xzcat . . .SH DESCRIPTION .B xz is a general-purpose data compression tool with command line syntax similar to .BR gzip (1) and .BR bzip2 (1). The native file format is the .B .xz format, but the legacy .B .lzma format used by LZMA Utils and raw compressed streams with no container format headers are also supported. .PP .B xz compresses or decompresses each .I file according to the selected operation mode. If no .I files are given or .I file is .BR \- , .B xz reads from standard input and writes the processed data to standard output. .B xz will refuse (display an error and skip the .IR file ) to write compressed data to standard output if it is a terminal. Similarly, .B xz will refuse to read compressed data from standard input if it is a terminal. .PP Unless .B \-\-stdout is specified, .I files other than .B \- are written to a new file whose name is derived from the source .I file name: .IP \(bu 3 When compressing, the suffix of the target file format .RB ( .xz or .BR .lzma ) is appended to the source filename to get the target filename. .IP \(bu 3 When decompressing, the .B .xz or .B .lzma suffix is removed from the filename to get the target filename. .B xz also recognizes the suffixes .B .txz and .BR .tlz , and replaces them with the .B .tar suffix. .PP If the target file already exists, an error is displayed and the .I file is skipped. .PP Unless writing to standard output, .B xz will display a warning and skip the .I file if any of the following applies: .IP \(bu 3 .I File is not a regular file. Symbolic links are not followed, and thus they are not considered to be regular files. .IP \(bu 3 .I File has more than one hard link. .IP \(bu 3 .I File has setuid, setgid, or sticky bit set. .IP \(bu 3 The operation mode is set to compress and the .I file already has a suffix of the target file format .RB ( .xz or .B .txz when compressing to the .B .xz format, and .B .lzma or .B .tlz when compressing to the .B .lzma format). .IP \(bu 3 The operation mode is set to decompress and the .I file doesn't have a suffix of any of the supported file formats .RB ( .xz , .BR .txz , .BR .lzma , or .BR .tlz ). .PP After successfully compressing or decompressing the .IR file , .B xz copies the owner, group, permissions, access time, and modification time from the source .I file to the target file. If copying the group fails, the permissions are modified so that the target file doesn't become accessible to users who didn't have permission to access the source .IR file . .B xz doesn't support copying other metadata like access control lists or extended attributes yet. .PP Once the target file has been successfully closed, the source .I file is removed unless .B \-\-keep was specified. The source .I file -is never removed if the output is written to standard output. +is never removed if the output is written to standard output +or if an error occurs. .PP Sending .B SIGINFO or .B SIGUSR1 to the .B xz process makes it print progress information to standard error. This has only limited use since when standard error is a terminal, using .B \-\-verbose will display an automatically updating progress indicator. . .SS "Memory usage" The memory usage of .B xz varies from a few hundred kilobytes to several gigabytes depending on the compression settings. The settings used when compressing a file determine the memory requirements of the decompressor. Typically the decompressor needs 5\ % to 20\ % of the amount of memory that the compressor needed when creating the file. For example, decompressing a file created with .B xz \-9 currently requires 65\ MiB of memory. Still, it is possible to have .B .xz files that require several gigabytes of memory to decompress. .PP Especially users of older systems may find the possibility of very large memory usage annoying. To prevent uncomfortable surprises, .B xz has a built-in memory usage limiter, which is disabled by default. While some operating systems provide ways to limit the memory usage of processes, relying on it -wasn't deemed to be flexible enough (e.g. using +wasn't deemed to be flexible enough (for example, using .BR ulimit (1) to limit virtual memory tends to cripple .BR mmap (2)). .PP The memory usage limiter can be enabled with the command line option \fB\-\-memlimit=\fIlimit\fR. Often it is more convenient to enable the limiter by default by setting the environment variable .BR XZ_DEFAULTS , -e.g.\& +for example, .BR XZ_DEFAULTS=\-\-memlimit=150MiB . It is possible to set the limits separately -for compression and decompression -by using \fB\-\-memlimit\-compress=\fIlimit\fR and -\fB\-\-memlimit\-decompress=\fIlimit\fR. +for compression and decompression by using +.BI \-\-memlimit\-compress= limit +and \fB\-\-memlimit\-decompress=\fIlimit\fR. Using these two options outside .B XZ_DEFAULTS is rarely useful because a single run of .B xz cannot do both compression and decompression and .BI \-\-memlimit= limit -(or \fB\-M\fR \fIlimit\fR) +(or +.B \-M +.IR limit ) is shorter to type on the command line. .PP If the specified memory usage limit is exceeded when decompressing, .B xz will display an error and decompressing the file will fail. If the limit is exceeded when compressing, .B xz will try to scale the settings down so that the limit -is no longer exceeded (except when using \fB\-\-format=raw\fR -or \fB\-\-no\-adjust\fR). +is no longer exceeded (except when using +.B \-\-format=raw +or +.BR \-\-no\-adjust ). This way the operation won't fail unless the limit is very small. The scaling of the settings is done in steps that don't -match the compression level presets, e.g. if the limit is +match the compression level presets, for example, if the limit is only slightly less than the amount required for .BR "xz \-9" , the settings will be scaled down only a little, not all the way down to .BR "xz \-8" . . .SS "Concatenation and padding with .xz files" It is possible to concatenate .B .xz files as is. .B xz will decompress such files as if they were a single .B .xz file. .PP It is possible to insert padding between the concatenated parts or after the last part. The padding must consist of null bytes and the size of the padding must be a multiple of four bytes. -This can be useful e.g. if the +This can be useful, for example, if the .B .xz file is stored on a medium that measures file sizes in 512-byte blocks. .PP Concatenation and padding are not allowed with .B .lzma files or raw streams. . .SH OPTIONS . .SS "Integer suffixes and special values" In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers. There must be no space between the integer and the suffix. .TP .B KiB Multiply the integer by 1,024 (2^10). .BR Ki , .BR k , .BR kB , .BR K , and .B KB are accepted as synonyms for .BR KiB . .TP .B MiB Multiply the integer by 1,048,576 (2^20). .BR Mi , .BR m , .BR M , and .B MB are accepted as synonyms for .BR MiB . .TP .B GiB Multiply the integer by 1,073,741,824 (2^30). .BR Gi , .BR g , .BR G , and .B GB are accepted as synonyms for .BR GiB . .PP The special value .B max can be used to indicate the maximum integer value supported by the option. . .SS "Operation mode" If multiple operation mode options are given, the last one takes effect. .TP .BR \-z ", " \-\-compress Compress. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, .B unxz implies .BR \-\-decompress ). .TP .BR \-d ", " \-\-decompress ", " \-\-uncompress Decompress. .TP .BR \-t ", " \-\-test Test the integrity of compressed .IR files . This option is equivalent to .B "\-\-decompress \-\-stdout" except that the decompressed data is discarded instead of being written to standard output. No files are created or removed. .TP .BR \-l ", " \-\-list Print information about compressed .IR files . No uncompressed output is produced, and no files are created or removed. In list mode, the program cannot read the compressed data from standard input or from other unseekable sources. .IP "" The default listing shows basic information about .IR files , one file per line. To get more detailed information, use also the .B \-\-verbose option. For even more information, use .B \-\-verbose twice, but note that this may be slow, because getting all the extra information requires many seeks. The width of verbose output exceeds -80 characters, so piping the output to e.g.\& +80 characters, so piping the output to, for example, .B "less\ \-S" may be convenient if the terminal isn't wide enough. .IP "" The exact output may vary between .B xz versions and different locales. For machine-readable output, .B \-\-robot \-\-list should be used. . .SS "Operation modifiers" .TP .BR \-k ", " \-\-keep Don't delete the input files. +.IP "" +Since +.B xz +5.2.6, +this option also makes +.B xz +compress or decompress even if the input is +a symbolic link to a regular file, +has more than one hard link, +or has the setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied +to the target file. +In earlier versions this was only done with +.BR \-\-force . .TP .BR \-f ", " \-\-force This option has several effects: .RS .IP \(bu 3 If the target file already exists, delete it before compressing or decompressing. .IP \(bu 3 Compress or decompress even if the input is a symbolic link to a regular file, has more than one hard link, or has the setuid, setgid, or sticky bit set. The setuid, setgid, and sticky bits are not copied to the target file. .IP \(bu 3 When used with .B \-\-decompress -.BR \-\-stdout +.B \-\-stdout and .B xz cannot recognize the type of the source file, copy the source file as is to standard output. This allows .B xzcat .B \-\-force to be used like .BR cat (1) for files that have not been compressed with .BR xz . Note that in future, .B xz might support new compressed file formats, which may make .B xz decompress more types of files instead of copying them as is to standard output. .BI \-\-format= format can be used to restrict .B xz to decompress only a single file format. .RE .TP .BR \-c ", " \-\-stdout ", " \-\-to\-stdout Write the compressed or decompressed data to standard output instead of a file. This implies .BR \-\-keep . .TP .B \-\-single\-stream Decompress only the first .B .xz stream, and silently ignore possible remaining input data following the stream. Normally such trailing garbage makes .B xz display an error. .IP "" .B xz never decompresses more than one stream from .B .lzma files or raw streams, but this option still makes .B xz ignore the possible trailing data after the .B .lzma file or raw stream. .IP "" This option has no effect if the operation mode is not .B \-\-decompress or .BR \-\-test . .TP .B \-\-no\-sparse Disable creation of sparse files. By default, if decompressing into a regular file, .B xz tries to make the file sparse if the decompressed data contains long sequences of binary zeros. It also works when writing to standard output as long as standard output is connected to a regular file and certain additional conditions are met to make it safe. Creating sparse files may save disk space and speed up the decompression by reducing the amount of disk I/O. .TP \fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf When compressing, use .I .suf as the suffix for the target file instead of .B .xz or .BR .lzma . If not writing to standard output and the source file already has the suffix .IR .suf , a warning is displayed and the file is skipped. .IP "" When decompressing, recognize files with the suffix .I .suf in addition to files with the .BR .xz , .BR .txz , .BR .lzma , or .B .tlz suffix. If the source file has the suffix .IR .suf , the suffix is removed to get the target filename. .IP "" When compressing or decompressing raw streams .RB ( \-\-format=raw ), the suffix must always be specified unless writing to standard output, because there is no default suffix for raw streams. .TP \fB\-\-files\fR[\fB=\fIfile\fR] Read the filenames to process from .IR file ; if .I file is omitted, filenames are read from standard input. Filenames must be terminated with the newline character. A dash .RB ( \- ) is taken as a regular filename; it doesn't mean standard input. If filenames are given also as command line arguments, they are processed before the filenames read from .IR file . .TP \fB\-\-files0\fR[\fB=\fIfile\fR] This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except that each filename must be terminated with the null character. . .SS "Basic file format and compression options" .TP \fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat Specify the file .I format to compress or decompress: .RS .TP .B auto This is the default. When compressing, .B auto is equivalent to .BR xz . When decompressing, the format of the input file is automatically detected. Note that raw streams (created with .BR \-\-format=raw ) cannot be auto-detected. .TP .B xz Compress to the .B .xz file format, or accept only .B .xz files when decompressing. .TP .BR lzma ", " alone Compress to the legacy .B .lzma file format, or accept only .B .lzma files when decompressing. The alternative name .B alone is provided for backwards compatibility with LZMA Utils. .TP .B raw Compress or uncompress a raw stream (no headers). This is meant for advanced users only. To decode raw streams, you need use .B \-\-format=raw and explicitly specify the filter chain, which normally would have been stored in the container headers. .RE .TP \fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck Specify the type of the integrity check. The check is calculated from the uncompressed data and stored in the .B .xz file. This option has an effect only when compressing into the .B .xz format; the .B .lzma format doesn't support integrity checks. The integrity check (if any) is verified when the .B .xz file is decompressed. .IP "" Supported .I check types: .RS .TP .B none Don't calculate an integrity check at all. This is usually a bad idea. This can be useful when integrity of the data is verified by other means anyway. .TP .B crc32 Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). .TP .B crc64 Calculate CRC64 using the polynomial from ECMA-182. This is the default, since it is slightly better than CRC32 at detecting damaged files and the speed difference is negligible. .TP .B sha256 Calculate SHA-256. This is somewhat slower than CRC32 and CRC64. .RE .IP "" Integrity of the .B .xz headers is always verified with CRC32. It is not possible to change or disable it. .TP .B \-\-ignore\-check Don't verify the integrity check of the compressed data when decompressing. The CRC32 values in the .B .xz headers will still be verified normally. .IP "" .B "Do not use this option unless you know what you are doing." Possible reasons to use this option: .RS .IP \(bu 3 Trying to recover data from a corrupt .xz file. .IP \(bu 3 Speeding up decompression. This matters mostly with SHA-256 or with files that have compressed extremely well. It's recommended to not use this option for this purpose unless the file integrity is verified externally in some other way. .RE .TP .BR \-0 " ... " \-9 Select a compression preset level. The default is .BR \-6 . If multiple preset levels are specified, the last one takes effect. If a custom filter chain was already specified, setting a compression preset level clears the custom filter chain. .IP "" The differences between the presets are more significant than with .BR gzip (1) and .BR bzip2 (1). The selected compression settings determine the memory requirements of the decompressor, thus using a too high preset level might make it painful to decompress the file on an old system with little RAM. Specifically, .B "it's not a good idea to blindly use \-9 for everything" like it often is with .BR gzip (1) and .BR bzip2 (1). .RS .TP .BR "\-0" " ... " "\-3" These are somewhat fast presets. .B \-0 is sometimes faster than .B "gzip \-9" while compressing much better. The higher ones often have speed comparable to .BR bzip2 (1) with comparable or better compression ratio, although the results depend a lot on the type of data being compressed. .TP .BR "\-4" " ... " "\-6" Good to very good compression while keeping decompressor memory usage reasonable even for old systems. .B \-6 is the default, which is usually a good choice -e.g. for distributing files that need to be decompressible +for distributing files that need to be decompressible even on systems with only 16\ MiB RAM. .RB ( \-5e or .B \-6e may be worth considering too. See .BR \-\-extreme .) .TP .B "\-7 ... \-9" These are like .B \-6 but with higher compressor and decompressor memory requirements. These are useful only when compressing files bigger than 8\ MiB, 16\ MiB, and 32\ MiB, respectively. .RE .IP "" On the same hardware, the decompression speed is approximately a constant number of bytes of compressed data per second. In other words, the better the compression, the faster the decompression will usually be. This also means that the amount of uncompressed output produced per second can vary a lot. .IP "" The following table summarises the features of the presets: .RS .RS .PP .TS tab(;); c c c c c n n n n n. Preset;DictSize;CompCPU;CompMem;DecMem \-0;256 KiB;0;3 MiB;1 MiB \-1;1 MiB;1;9 MiB;2 MiB \-2;2 MiB;2;17 MiB;3 MiB \-3;4 MiB;3;32 MiB;5 MiB \-4;4 MiB;4;48 MiB;5 MiB \-5;8 MiB;5;94 MiB;9 MiB \-6;8 MiB;6;94 MiB;9 MiB \-7;16 MiB;6;186 MiB;17 MiB \-8;32 MiB;6;370 MiB;33 MiB \-9;64 MiB;6;674 MiB;65 MiB .TE .RE .RE .IP "" Column descriptions: .RS .IP \(bu 3 DictSize is the LZMA2 dictionary size. It is waste of memory to use a dictionary bigger than the size of the uncompressed file. This is why it is good to avoid using the presets .BR \-7 " ... " \-9 when there's no real need for them. At .B \-6 and lower, the amount of memory wasted is usually low enough to not matter. .IP \(bu 3 CompCPU is a simplified representation of the LZMA2 settings that affect compression speed. The dictionary size affects speed too, so while CompCPU is the same for levels .BR \-6 " ... " \-9 , higher levels still tend to be a little slower. To get even slower and thus possibly better compression, see .BR \-\-extreme . .IP \(bu 3 CompMem contains the compressor memory requirements in the single-threaded mode. It may vary slightly between .B xz versions. Memory requirements of some of the future multithreaded modes may be dramatically higher than that of the single-threaded mode. .IP \(bu 3 DecMem contains the decompressor memory requirements. That is, the compression settings determine the memory requirements of the decompressor. The exact decompressor memory usage is slightly more than the LZMA2 dictionary size, but the values in the table have been rounded up to the next full MiB. .RE .TP .BR \-e ", " \-\-extreme Use a slower variant of the selected compression preset level .RB ( \-0 " ... " \-9 ) to hopefully get a little bit better compression ratio, but with bad luck this can also make it worse. Decompressor memory usage is not affected, but compressor memory usage increases a little at preset levels .BR \-0 " ... " \-3 . .IP "" Since there are two presets with dictionary sizes 4\ MiB and 8\ MiB, the presets .B \-3e and .B \-5e use slightly faster settings (lower CompCPU) than .B \-4e and .BR \-6e , respectively. That way no two presets are identical. .RS .RS .PP .TS tab(;); c c c c c n n n n n. Preset;DictSize;CompCPU;CompMem;DecMem \-0e;256 KiB;8;4 MiB;1 MiB \-1e;1 MiB;8;13 MiB;2 MiB \-2e;2 MiB;8;25 MiB;3 MiB \-3e;4 MiB;7;48 MiB;5 MiB \-4e;4 MiB;8;48 MiB;5 MiB \-5e;8 MiB;7;94 MiB;9 MiB \-6e;8 MiB;8;94 MiB;9 MiB \-7e;16 MiB;8;186 MiB;17 MiB \-8e;32 MiB;8;370 MiB;33 MiB \-9e;64 MiB;8;674 MiB;65 MiB .TE .RE .RE .IP "" For example, there are a total of four presets that use 8\ MiB dictionary, whose order from the fastest to the slowest is .BR \-5 , .BR \-6 , .BR \-5e , and .BR \-6e . .TP .B \-\-fast .PD 0 .TP .B \-\-best .PD These are somewhat misleading aliases for .B \-0 and .BR \-9 , respectively. These are provided only for backwards compatibility with LZMA Utils. Avoid using these options. .TP .BI \-\-block\-size= size When compressing to the .B .xz format, split the input data into blocks of .I size bytes. The blocks are compressed independently from each other, which helps with multi-threading and makes limited random-access decompression possible. This option is typically used to override the default block size in multi-threaded mode, but this option can be used in single-threaded mode too. .IP "" In multi-threaded mode about three times .I size bytes will be allocated in each thread for buffering input and output. The default .I size is three times the LZMA2 dictionary size or 1 MiB, whichever is more. -Typically a good value is 2\-4 times +Typically a good value is 2\(en4 times the size of the LZMA2 dictionary or at least 1 MiB. Using .I size less than the LZMA2 dictionary size is waste of RAM because then the LZMA2 dictionary buffer will never get fully used. The sizes of the blocks are stored in the block headers, which a future version of .B xz will use for multi-threaded decompression. .IP "" In single-threaded mode no block splitting is done by default. Setting this option doesn't affect memory usage. No size information is stored in block headers, thus files created in single-threaded mode won't be identical to files created in multi-threaded mode. The lack of size information also means that a future version of .B xz won't be able decompress the files in multi-threaded mode. .TP .BI \-\-block\-list= sizes When compressing to the .B .xz format, start a new block after the given intervals of uncompressed data. .IP "" The uncompressed .I sizes of the blocks are specified as a comma-separated list. Omitting a size (two or more consecutive commas) is a shorthand to use the size of the previous block. .IP "" If the input file is bigger than the sum of .IR sizes , the last value in .I sizes is repeated until the end of the file. A special value of .B 0 may be used as the last value to indicate that the rest of the file should be encoded as a single block. .IP "" If one specifies .I sizes that exceed the encoder's block size (either the default value in threaded mode or the value specified with \fB\-\-block\-size=\fIsize\fR), the encoder will create additional blocks while keeping the boundaries specified in .IR sizes . For example, if one specifies .B \-\-block\-size=10MiB .B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB and the input file is 80 MiB, one will get 11 blocks: 5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB. .IP "" In multi-threaded mode the sizes of the blocks are stored in the block headers. This isn't done in single-threaded mode, so the encoded output won't be identical to that of the multi-threaded mode. .TP .BI \-\-flush\-timeout= timeout When compressing, if more than .I timeout milliseconds (a positive integer) has passed since the previous flush and reading more input would block, all the pending input data is flushed from the encoder and made available in the output stream. This can be useful if .B xz is used to compress data that is streamed over a network. Small .I timeout values make the data available at the receiving end with a small delay, but large .I timeout values give better compression ratio. .IP "" This feature is disabled by default. If this option is specified more than once, the last one takes effect. The special .I timeout value of .B 0 can be used to explicitly disable this feature. .IP "" This feature is not available on non-POSIX systems. .IP "" .\" FIXME .B "This feature is still experimental." Currently .B xz is unsuitable for decompressing the stream in real time due to how .B xz does buffering. .TP .BI \-\-memlimit\-compress= limit Set a memory usage limit for compression. If this option is specified multiple times, the last one takes effect. .IP "" If the compression settings exceed the .IR limit , .B xz will adjust the settings downwards so that the limit is no longer exceeded and display a notice that automatic adjustment was done. Such adjustments are not made when compressing with .B \-\-format=raw or if .B \-\-no\-adjust has been specified. In those cases, an error is displayed and .B xz will exit with exit status 1. .IP "" The .I limit can be specified in multiple ways: .RS .IP \(bu 3 The .I limit can be an absolute value in bytes. Using an integer suffix like .B MiB can be useful. Example: .B "\-\-memlimit\-compress=80MiB" .IP \(bu 3 The .I limit can be specified as a percentage of total physical memory (RAM). This can be useful especially when setting the .B XZ_DEFAULTS environment variable in a shell initialization script that is shared between different computers. That way the limit is automatically bigger on systems with more memory. Example: .B "\-\-memlimit\-compress=70%" .IP \(bu 3 The .I limit can be reset back to its default value by setting it to .BR 0 . This is currently equivalent to setting the .I limit to .B max (no memory usage limit). Once multithreading support has been implemented, there may be a difference between .B 0 and .B max for the multithreaded case, so it is recommended to use .B 0 instead of .B max until the details have been decided. .RE .IP "" For 32-bit -.BR xz +.B xz there is a special case: if the .I limit would be over .BR "4020\ MiB" , the .I limit is set to .BR "4020\ MiB" . +On MIPS32 +.B "2000\ MiB" +is used instead. (The values .B 0 and .B max aren't affected by this. A similar feature doesn't exist for decompression.) This can be helpful when a 32-bit executable has access -to 4\ GiB address space while hopefully doing no harm in other situations. +to 4\ GiB address space (2 GiB on MIPS32) +while hopefully doing no harm in other situations. .IP "" See also the section .BR "Memory usage" . .TP .BI \-\-memlimit\-decompress= limit Set a memory usage limit for decompression. This also affects the .B \-\-list mode. If the operation is not possible without exceeding the .IR limit , .B xz will display an error and decompressing the file will fail. See .BI \-\-memlimit\-compress= limit for possible ways to specify the .IR limit . .TP \fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit -This is equivalent to specifying \fB\-\-memlimit\-compress=\fIlimit +This is equivalent to specifying +.BI \-\-memlimit\-compress= limit \fB\-\-memlimit\-decompress=\fIlimit\fR. .TP .B \-\-no\-adjust Display an error and exit if the compression settings exceed the memory usage limit. The default is to adjust the settings downwards so that the memory usage limit is not exceeded. Automatic adjusting is always disabled when creating raw streams .RB ( \-\-format=raw ). .TP \fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads Specify the number of worker threads to use. Setting .I threads to a special value .B 0 makes .B xz use as many threads as there are CPU cores on the system. The actual number of threads can be less than .I threads if the input file is not big enough for threading with the given settings or if using more threads would exceed the memory usage limit. .IP "" Currently the only threading method is to split the input into blocks and compress them independently from each other. The default block size depends on the compression level and can be overridden with the .BI \-\-block\-size= size option. .IP "" Threaded decompression hasn't been implemented yet. It will only work on files that contain multiple blocks with size information in block headers. All files compressed in multi-threaded mode meet this condition, but files compressed in single-threaded mode don't even if .BI \-\-block\-size= size is used. . .SS "Custom compressor filter chains" A custom filter chain allows specifying the compression settings in detail instead of relying on the settings associated to the presets. When a custom filter chain is specified, -preset options (\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) +preset options +.RB ( \-0 +\&...\& +.B \-9 +and +.BR \-\-extreme ) earlier on the command line are forgotten. If a preset option is specified after one or more custom filter chain options, the new preset takes effect and the custom filter chain options specified earlier are forgotten. .PP A filter chain is comparable to piping on the command line. When compressing, the uncompressed input goes to the first filter, whose output goes to the next filter (if any). The output of the last filter gets written to the compressed file. The maximum number of filters in the chain is four, but typically a filter chain has only one or two filters. .PP Many filters have limitations on where they can be in the filter chain: some filters can work only as the last filter in the chain, some only as a non-last filter, and some work in any position in the chain. Depending on the filter, this limitation is either inherent to the filter design or exists to prevent security issues. .PP A custom filter chain is specified by using one or more filter options in the order they are wanted in the filter chain. That is, the order of filter options is significant! When decoding raw streams .RB ( \-\-format=raw ), the filter chain is specified in the same order as it was specified when compressing. .PP Filters take filter-specific .I options as a comma-separated list. Extra commas in .I options are ignored. Every option has a default value, so you need to specify only those you want to change. .PP To see the whole filter chain and .IR options , use .B "xz \-vv" (that is, use .B \-\-verbose twice). This works also for viewing the filter chain options used by presets. .TP \fB\-\-lzma1\fR[\fB=\fIoptions\fR] .PD 0 .TP \fB\-\-lzma2\fR[\fB=\fIoptions\fR] .PD Add LZMA1 or LZMA2 filter to the filter chain. These filters can be used only as the last filter in the chain. .IP "" LZMA1 is a legacy filter, which is supported almost solely due to the legacy .B .lzma file format, which supports only LZMA1. LZMA2 is an updated version of LZMA1 to fix some practical issues of LZMA1. The .B .xz format uses LZMA2 and doesn't support LZMA1 at all. Compression speed and ratios of LZMA1 and LZMA2 are practically the same. .IP "" LZMA1 and LZMA2 share the same set of .IR options : .RS .TP .BI preset= preset Reset all LZMA1 or LZMA2 .I options to .IR preset . .I Preset consist of an integer, which may be followed by single-letter preset modifiers. The integer can be from .B 0 to .BR 9 , -matching the command line options \fB\-0\fR ... \fB\-9\fR. +matching the command line options +.B \-0 +\&...\& +.BR \-9 . The only supported modifier is currently .BR e , which matches .BR \-\-extreme . If no .B preset is specified, the default values of LZMA1 or LZMA2 .I options are taken from the preset .BR 6 . .TP .BI dict= size Dictionary (history buffer) .I size indicates how many bytes of the recently processed uncompressed data is kept in memory. The algorithm tries to find repeating byte sequences (matches) in the uncompressed data, and replace them with references to the data currently in the dictionary. The bigger the dictionary, the higher is the chance to find a match. Thus, increasing dictionary .I size usually improves compression ratio, but a dictionary bigger than the uncompressed file is waste of memory. .IP "" Typical dictionary .I size is from 64\ KiB to 64\ MiB. The minimum is 4\ KiB. The maximum for compression is currently 1.5\ GiB (1536\ MiB). The decompressor already supports dictionaries up to one byte less than 4\ GiB, which is the maximum for the LZMA1 and LZMA2 stream formats. .IP "" Dictionary .I size and match finder .RI ( mf ) together determine the memory usage of the LZMA1 or LZMA2 encoder. The same (or bigger) dictionary .I size is required for decompressing that was used when compressing, thus the memory usage of the decoder is determined by the dictionary size used when compressing. The .B .xz headers store the dictionary .I size either as .RI "2^" n or .RI "2^" n " + 2^(" n "\-1)," so these .I sizes are somewhat preferred for compression. Other .I sizes will get rounded up when stored in the .B .xz headers. .TP .BI lc= lc Specify the number of literal context bits. The minimum is 0 and the maximum is 4; the default is 3. In addition, the sum of .I lc and .I lp must not exceed 4. .IP "" All bytes that cannot be encoded as matches are encoded as literals. That is, literals are simply 8-bit bytes that are encoded one at a time. .IP "" The literal coding makes an assumption that the highest .I lc bits of the previous uncompressed byte correlate with the next byte. -E.g. in typical English text, an upper-case letter is +For example, in typical English text, an upper-case letter is often followed by a lower-case letter, and a lower-case letter is usually followed by another lower-case letter. In the US-ASCII character set, the highest three bits are 010 for upper-case letters and 011 for lower-case letters. When .I lc is at least 3, the literal coding can take advantage of this property in the uncompressed data. .IP "" The default value (3) is usually good. If you want maximum compression, test .BR lc=4 . Sometimes it helps a little, and sometimes it makes compression worse. -If it makes it worse, test e.g.\& +If it makes it worse, test .B lc=2 too. .TP .BI lp= lp Specify the number of literal position bits. The minimum is 0 and the maximum is 4; the default is 0. .IP "" .I Lp affects what kind of alignment in the uncompressed data is assumed when encoding literals. See .I pb below for more information about alignment. .TP .BI pb= pb Specify the number of position bits. The minimum is 0 and the maximum is 4; the default is 2. .IP "" .I Pb affects what kind of alignment in the uncompressed data is assumed in general. The default means four-byte alignment .RI (2^ pb =2^2=4), which is often a good choice when there's no better guess. .IP "" -When the aligment is known, setting +When the alignment is known, setting .I pb accordingly may reduce the file size a little. -E.g. with text files having one-byte +For example, with text files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), setting .B pb=0 can improve compression slightly. For UTF-16 text, .B pb=1 is a good choice. If the alignment is an odd number like 3 bytes, .B pb=0 might be the best choice. .IP "" Even though the assumed alignment can be adjusted with .I pb and .IR lp , LZMA1 and LZMA2 still slightly favor 16-byte alignment. It might be worth taking into account when designing file formats that are likely to be often compressed with LZMA1 or LZMA2. .TP .BI mf= mf Match finder has a major effect on encoder speed, memory usage, and compression ratio. Usually Hash Chain match finders are faster than Binary Tree match finders. The default depends on the .IR preset : 0 uses .BR hc3 , -1\-3 +1\(en3 use .BR hc4 , and the rest use .BR bt4 . .IP "" The following match finders are supported. The memory usage formulas below are rough approximations, which are closest to the reality when .I dict is a power of two. .RS .TP .B hc3 Hash Chain with 2- and 3-byte hashing .br Minimum value for .IR nice : 3 .br Memory usage: .br .I dict * 7.5 (if .I dict <= 16 MiB); .br .I dict * 5.5 + 64 MiB (if .I dict > 16 MiB) .TP .B hc4 Hash Chain with 2-, 3-, and 4-byte hashing .br Minimum value for .IR nice : 4 .br Memory usage: .br .I dict * 7.5 (if .I dict <= 32 MiB); .br .I dict * 6.5 (if .I dict > 32 MiB) .TP .B bt2 Binary Tree with 2-byte hashing .br Minimum value for .IR nice : 2 .br Memory usage: .I dict * 9.5 .TP .B bt3 Binary Tree with 2- and 3-byte hashing .br Minimum value for .IR nice : 3 .br Memory usage: .br .I dict * 11.5 (if .I dict <= 16 MiB); .br .I dict * 9.5 + 64 MiB (if .I dict > 16 MiB) .TP .B bt4 Binary Tree with 2-, 3-, and 4-byte hashing .br Minimum value for .IR nice : 4 .br Memory usage: .br .I dict * 11.5 (if .I dict <= 32 MiB); .br .I dict * 10.5 (if .I dict > 32 MiB) .RE .TP .BI mode= mode Compression .I mode specifies the method to analyze the data produced by the match finder. Supported .I modes are .B fast and .BR normal . The default is .B fast for .I presets -0\-3 and +0\(en3 and .B normal for .I presets -4\-9. +4\(en9. .IP "" Usually .B fast is used with Hash Chain match finders and .B normal with Binary Tree match finders. This is also what the .I presets do. .TP .BI nice= nice Specify what is considered to be a nice length for a match. Once a match of at least .I nice bytes is found, the algorithm stops looking for possibly better matches. .IP "" .I Nice -can be 2\-273 bytes. +can be 2\(en273 bytes. Higher values tend to give better compression ratio at the expense of speed. The default depends on the .IR preset . .TP .BI depth= depth Specify the maximum search depth in the match finder. The default is the special value of 0, which makes the compressor determine a reasonable .I depth from .I mf and .IR nice . .IP "" Reasonable .I depth -for Hash Chains is 4\-100 and 16\-1000 for Binary Trees. +for Hash Chains is 4\(en100 and 16\(en1000 for Binary Trees. Using very high values for .I depth can make the encoder extremely slow with some files. Avoid setting the .I depth over 1000 unless you are prepared to interrupt the compression in case it is taking far too long. .RE .IP "" When decoding raw streams .RB ( \-\-format=raw ), LZMA2 needs only the dictionary .IR size . LZMA1 needs also .IR lc , .IR lp , and .IR pb . .TP \fB\-\-x86\fR[\fB=\fIoptions\fR] .PD 0 .TP \fB\-\-powerpc\fR[\fB=\fIoptions\fR] .TP \fB\-\-ia64\fR[\fB=\fIoptions\fR] .TP \fB\-\-arm\fR[\fB=\fIoptions\fR] .TP \fB\-\-armthumb\fR[\fB=\fIoptions\fR] .TP \fB\-\-sparc\fR[\fB=\fIoptions\fR] .PD Add a branch/call/jump (BCJ) filter to the filter chain. These filters can be used only as a non-last filter in the filter chain. .IP "" A BCJ filter converts relative addresses in the machine code to their absolute counterparts. This doesn't change the size of the data, but it increases redundancy, -which can help LZMA2 to produce 0\-15\ % smaller +which can help LZMA2 to produce 0\(en15\ % smaller .B .xz file. The BCJ filters are always reversible, so using a BCJ filter for wrong type of data doesn't cause any data loss, although it may make the compression ratio slightly worse. .IP "" It is fine to apply a BCJ filter on a whole executable; there's no need to apply it only on the executable section. Applying a BCJ filter on an archive that contains both executable and non-executable files may or may not give good results, so it generally isn't good to blindly apply a BCJ filter when compressing binary packages for distribution. .IP "" These BCJ filters are very fast and use insignificant amount of memory. If a BCJ filter improves compression ratio of a file, it can improve decompression speed at the same time. This is because, on the same hardware, the decompression speed of LZMA2 is roughly a fixed number of bytes of compressed data per second. .IP "" These BCJ filters have known problems related to the compression ratio: .RS .IP \(bu 3 Some types of files containing executable code -(e.g. object files, static libraries, and Linux kernel modules) +(for example, object files, static libraries, and Linux kernel modules) have the addresses in the instructions filled with filler values. These BCJ filters will still do the address conversion, which will make the compression worse with these files. .IP \(bu 3 Applying a BCJ filter on an archive containing multiple similar executables can make the compression ratio worse than not using a BCJ filter. This is because the BCJ filter doesn't detect the boundaries of the executable files, and doesn't reset the address conversion counter for each executable. .RE .IP "" Both of the above problems will be fixed in the future in a new filter. The old BCJ filters will still be useful in embedded systems, because the decoder of the new filter will be bigger and use more memory. .IP "" Different instruction sets have different alignment: .RS .RS .PP .TS tab(;); l n l l n l. Filter;Alignment;Notes x86;1;32-bit or 64-bit x86 PowerPC;4;Big endian only ARM;4;Little endian only ARM-Thumb;2;Little endian only IA-64;16;Big or little endian SPARC;4;Big or little endian .TE .RE .RE .IP "" Since the BCJ-filtered data is usually compressed with LZMA2, the compression ratio may be improved slightly if the LZMA2 options are set to match the alignment of the selected BCJ filter. For example, with the IA-64 filter, it's good to set .B pb=4 with LZMA2 (2^4=16). The x86 filter is an exception; it's usually good to stick to LZMA2's default four-byte alignment when compressing x86 executables. .IP "" All BCJ filters support the same .IR options : .RS .TP .BI start= offset Specify the start .I offset that is used when converting between relative and absolute addresses. The .I offset must be a multiple of the alignment of the filter (see the table above). The default is zero. In practice, the default is good; specifying a custom .I offset is almost never useful. .RE .TP \fB\-\-delta\fR[\fB=\fIoptions\fR] Add the Delta filter to the filter chain. The Delta filter can be only used as a non-last filter in the filter chain. .IP "" Currently only simple byte-wise delta calculation is supported. -It can be useful when compressing e.g. uncompressed bitmap images +It can be useful when compressing, for example, uncompressed bitmap images or uncompressed PCM audio. However, special purpose algorithms may give significantly better results than Delta + LZMA2. This is true especially with audio, -which compresses faster and better e.g. with +which compresses faster and better, for example, with .BR flac (1). .IP "" Supported .IR options : .RS .TP .BI dist= distance Specify the .I distance of the delta calculation in bytes. .I distance -must be 1\-256. +must be 1\(en256. The default is 1. .IP "" For example, with .B dist=2 and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02. .RE . .SS "Other options" .TP .BR \-q ", " \-\-quiet Suppress warnings and notices. Specify this twice to suppress errors too. This option has no effect on the exit status. That is, even if a warning was suppressed, the exit status to indicate a warning is still used. .TP .BR \-v ", " \-\-verbose Be verbose. If standard error is connected to a terminal, .B xz will display a progress indicator. Specifying .B \-\-verbose twice will give even more verbose output. .IP "" The progress indicator shows the following information: .RS .IP \(bu 3 Completion percentage is shown if the size of the input file is known. That is, the percentage cannot be shown in pipes. .IP \(bu 3 Amount of compressed data produced (compressing) or consumed (decompressing). .IP \(bu 3 Amount of uncompressed data consumed (compressing) or produced (decompressing). .IP \(bu 3 Compression ratio, which is calculated by dividing the amount of compressed data processed so far by the amount of uncompressed data processed so far. .IP \(bu 3 Compression or decompression speed. This is measured as the amount of uncompressed data consumed (compression) or produced (decompression) per second. It is shown after a few seconds have passed since .B xz started processing the file. .IP \(bu 3 Elapsed time in the format M:SS or H:MM:SS. .IP \(bu 3 Estimated remaining time is shown only when the size of the input file is known and a couple of seconds have already passed since .B xz started processing the file. The time is shown in a less precise format which -never has any colons, e.g. 2 min 30 s. +never has any colons, for example, 2 min 30 s. .RE .IP "" When standard error is not a terminal, .B \-\-verbose will make .B xz print the filename, compressed size, uncompressed size, compression ratio, and possibly also the speed and elapsed time on a single line to standard error after compressing or decompressing the file. The speed and elapsed time are included only when the operation took at least a few seconds. -If the operation didn't finish, e.g. due to user interruption, +If the operation didn't finish, for example, due to user interruption, also the completion percentage is printed if the size of the input file is known. .TP .BR \-Q ", " \-\-no\-warn Don't set the exit status to 2 even if a condition worth a warning was detected. This option doesn't affect the verbosity level, thus both .B \-\-quiet and .B \-\-no\-warn have to be used to not display warnings and to not alter the exit status. .TP .B \-\-robot Print messages in a machine-parsable format. This is intended to ease writing frontends that want to use .B xz instead of liblzma, which may be the case with various scripts. The output with this option enabled is meant to be stable across .B xz releases. See the section .B "ROBOT MODE" for details. .TP -.BR \-\-info\-memory +.B \-\-info\-memory Display, in human-readable format, how much physical memory (RAM) .B xz thinks the system has and the memory usage limits for compression and decompression, and exit successfully. .TP .BR \-h ", " \-\-help Display a help message describing the most commonly used options, and exit successfully. .TP .BR \-H ", " \-\-long\-help Display a help message describing all features of .BR xz , and exit successfully .TP .BR \-V ", " \-\-version Display the version number of .B xz and liblzma in human readable format. To get machine-parsable output, specify .B \-\-robot before .BR \-\-version . . .SH "ROBOT MODE" The robot mode is activated with the .B \-\-robot option. It makes the output of .B xz easier to parse by other programs. Currently .B \-\-robot is supported only together with .BR \-\-version , .BR \-\-info\-memory , and .BR \-\-list . It will be supported for compression and decompression in the future. . .SS Version .B "xz \-\-robot \-\-version" will print the version number of .B xz and liblzma in the following format: .PP .BI XZ_VERSION= XYYYZZZS .br .BI LIBLZMA_VERSION= XYYYZZZS .TP .I X Major version. .TP .I YYY Minor version. Even numbers are stable. Odd numbers are alpha or beta versions. .TP .I ZZZ Patch level for stable releases or just a counter for development releases. .TP .I S Stability. 0 is alpha, 1 is beta, and 2 is stable. .I S should be always 2 when .I YYY is even. .PP .I XYYYZZZS are the same on both lines if .B xz and liblzma are from the same XZ Utils release. .PP Examples: 4.999.9beta is .B 49990091 and 5.0.0 is .BR 50000002 . . .SS "Memory limit information" .B "xz \-\-robot \-\-info\-memory" prints a single line with three tab-separated columns: .IP 1. 4 Total amount of physical memory (RAM) in bytes .IP 2. 4 Memory usage limit for compression in bytes. A special value of zero indicates the default setting, which for single-threaded mode is the same as no limit. .IP 3. 4 Memory usage limit for decompression in bytes. A special value of zero indicates the default setting, which for single-threaded mode is the same as no limit. .PP In the future, the output of .B "xz \-\-robot \-\-info\-memory" may have more columns, but never more than a single line. . .SS "List mode" .B "xz \-\-robot \-\-list" uses tab-separated output. The first column of every line has a string that indicates the type of the information found on that line: .TP .B name This is always the first line when starting to list a file. The second column on the line is the filename. .TP .B file This line contains overall information about the .B .xz file. This line is always printed after the .B name line. .TP .B stream This line type is used only when .B \-\-verbose was specified. There are as many .B stream lines as there are streams in the .B .xz file. .TP .B block This line type is used only when .B \-\-verbose was specified. There are as many .B block lines as there are blocks in the .B .xz file. The .B block lines are shown after all the .B stream lines; different line types are not interleaved. .TP .B summary This line type is used only when .B \-\-verbose was specified twice. This line is printed after all .B block lines. Like the .B file line, the .B summary line contains overall information about the .B .xz file. .TP .B totals This line is always the very last line of the list output. It shows the total counts and sizes. .PP The columns of the .B file lines: .PD 0 .RS .IP 2. 4 Number of streams in the file .IP 3. 4 Total number of blocks in the stream(s) .IP 4. 4 Compressed size of the file .IP 5. 4 Uncompressed size of the file .IP 6. 4 -Compression ratio, for example -.BR 0.123. +Compression ratio, for example, +.BR 0.123 . If ratio is over 9.999, three dashes .RB ( \-\-\- ) are displayed instead of the ratio. .IP 7. 4 Comma-separated list of integrity check names. The following strings are used for the known check types: .BR None , .BR CRC32 , .BR CRC64 , and .BR SHA\-256 . For unknown check types, .BI Unknown\- N is used, where .I N is the Check ID as a decimal number (one or two digits). .IP 8. 4 Total size of stream padding in the file .RE .PD .PP The columns of the .B stream lines: .PD 0 .RS .IP 2. 4 Stream number (the first stream is 1) .IP 3. 4 Number of blocks in the stream .IP 4. 4 Compressed start offset .IP 5. 4 Uncompressed start offset .IP 6. 4 Compressed size (does not include stream padding) .IP 7. 4 Uncompressed size .IP 8. 4 Compression ratio .IP 9. 4 Name of the integrity check .IP 10. 4 Size of stream padding .RE .PD .PP The columns of the .B block lines: .PD 0 .RS .IP 2. 4 Number of the stream containing this block .IP 3. 4 Block number relative to the beginning of the stream (the first block is 1) .IP 4. 4 Block number relative to the beginning of the file .IP 5. 4 Compressed start offset relative to the beginning of the file .IP 6. 4 Uncompressed start offset relative to the beginning of the file .IP 7. 4 Total compressed size of the block (includes headers) .IP 8. 4 Uncompressed size .IP 9. 4 Compression ratio .IP 10. 4 Name of the integrity check .RE .PD .PP If .B \-\-verbose was specified twice, additional columns are included on the .B block lines. These are not displayed with a single .BR \-\-verbose , because getting this information requires many seeks and can thus be slow: .PD 0 .RS .IP 11. 4 Value of the integrity check in hexadecimal .IP 12. 4 Block header size .IP 13. 4 Block flags: .B c indicates that compressed size is present, and .B u indicates that uncompressed size is present. If the flag is not set, a dash .RB ( \- ) is shown instead to keep the string length fixed. New flags may be added to the end of the string in the future. .IP 14. 4 Size of the actual compressed data in the block (this excludes the block header, block padding, and check fields) .IP 15. 4 Amount of memory (in bytes) required to decompress this block with this .B xz version .IP 16. 4 Filter chain. Note that most of the options used at compression time cannot be known, because only the options that are needed for decompression are stored in the .B .xz headers. .RE .PD .PP The columns of the .B summary lines: .PD 0 .RS .IP 2. 4 Amount of memory (in bytes) required to decompress this file with this .B xz version .IP 3. 4 .B yes or .B no indicating if all block headers have both compressed size and uncompressed size stored in them .PP .I Since .B xz .I 5.1.2alpha: .IP 4. 4 Minimum .B xz version required to decompress the file .RE .PD .PP The columns of the .B totals line: .PD 0 .RS .IP 2. 4 Number of streams .IP 3. 4 Number of blocks .IP 4. 4 Compressed size .IP 5. 4 Uncompressed size .IP 6. 4 Average compression ratio .IP 7. 4 Comma-separated list of integrity check names that were present in the files .IP 8. 4 Stream padding size .IP 9. 4 Number of files. This is here to keep the order of the earlier columns the same as on .B file lines. .PD .RE .PP If .B \-\-verbose was specified twice, additional columns are included on the .B totals line: .PD 0 .RS .IP 10. 4 Maximum amount of memory (in bytes) required to decompress the files with this .B xz version .IP 11. 4 .B yes or .B no indicating if all block headers have both compressed size and uncompressed size stored in them .PP .I Since .B xz .I 5.1.2alpha: .IP 12. 4 Minimum .B xz version required to decompress the file .RE .PD .PP Future versions may add new line types and new columns can be added to the existing line types, but the existing columns won't be changed. . .SH "EXIT STATUS" .TP .B 0 All is good. .TP .B 1 An error occurred. .TP .B 2 Something worth a warning occurred, but no actual errors occurred. .PP Notices (not warnings or errors) printed on standard error don't affect the exit status. . .SH ENVIRONMENT .B xz parses space-separated lists of options from the environment variables .B XZ_DEFAULTS and .BR XZ_OPT , in this order, before parsing the options from the command line. Note that only options are parsed from the environment variables; all non-options are silently ignored. Parsing is done with .BR getopt_long (3) which is used also for the command line arguments. .TP .B XZ_DEFAULTS User-specific or system-wide default options. Typically this is set in a shell initialization script to enable .BR xz 's memory usage limiter by default. Excluding shell initialization scripts and similar special cases, scripts must never set or unset .BR XZ_DEFAULTS . .TP .B XZ_OPT This is for passing options to .B xz when it is not possible to set the options directly on the .B xz command line. -This is the case e.g. when +This is the case when .B xz -is run by a script or tool, e.g. GNU +is run by a script or tool, for example, GNU .BR tar (1): .RS .RS .PP .nf .ft CW XZ_OPT=\-2v tar caf foo.tar.xz foo .ft R .fi .RE .RE .IP "" Scripts may use -.B XZ_OPT -e.g. to set script-specific default compression options. +.BR XZ_OPT , +for example, to set script-specific default compression options. It is still recommended to allow users to override .B XZ_OPT -if that is reasonable, e.g. in +if that is reasonable. +For example, in .BR sh (1) scripts one may use something like this: .RS .RS .PP .nf .ft CW XZ_OPT=${XZ_OPT\-"\-7e"} export XZ_OPT .ft R .fi .RE .RE . .SH "LZMA UTILS COMPATIBILITY" The command line syntax of .B xz is practically a superset of .BR lzma , .BR unlzma , and -.BR lzcat +.B lzcat as found from LZMA Utils 4.32.x. In most cases, it is possible to replace LZMA Utils with XZ Utils without breaking existing scripts. There are some incompatibilities though, which may sometimes cause problems. . .SS "Compression preset levels" The numbering of the compression level presets is not identical in .B xz and LZMA Utils. The most important difference is how dictionary sizes are mapped to different presets. Dictionary size is roughly equal to the decompressor memory usage. .RS .PP .TS tab(;); c c c c n n. Level;xz;LZMA Utils \-0;256 KiB;N/A \-1;1 MiB;64 KiB \-2;2 MiB;1 MiB \-3;4 MiB;512 KiB \-4;4 MiB;1 MiB \-5;8 MiB;2 MiB \-6;8 MiB;4 MiB \-7;16 MiB;8 MiB \-8;32 MiB;16 MiB \-9;64 MiB;32 MiB .TE .RE .PP The dictionary size differences affect the compressor memory usage too, but there are some other differences between LZMA Utils and XZ Utils, which make the difference even bigger: .RS .PP .TS tab(;); c c c c n n. Level;xz;LZMA Utils 4.32.x \-0;3 MiB;N/A \-1;9 MiB;2 MiB \-2;17 MiB;12 MiB \-3;32 MiB;12 MiB \-4;48 MiB;16 MiB \-5;94 MiB;26 MiB \-6;94 MiB;45 MiB \-7;186 MiB;83 MiB \-8;370 MiB;159 MiB \-9;674 MiB;311 MiB .TE .RE .PP The default preset level in LZMA Utils is .B \-7 while in XZ Utils it is .BR \-6 , so both use an 8 MiB dictionary by default. . .SS "Streamed vs. non-streamed .lzma files" The uncompressed size of the file can be stored in the .B .lzma header. LZMA Utils does that when compressing regular files. The alternative is to mark that uncompressed size is unknown and use end-of-payload marker to indicate where the decompressor should stop. LZMA Utils uses this method when uncompressed size isn't known, -which is the case for example in pipes. +which is the case, for example, in pipes. .PP .B xz supports decompressing .B .lzma files with or without end-of-payload marker, but all .B .lzma files created by .B xz will use end-of-payload marker and have uncompressed size marked as unknown in the .B .lzma header. This may be a problem in some uncommon situations. For example, a .B .lzma decompressor in an embedded device might work only with files that have known uncompressed size. If you hit this problem, you need to use LZMA Utils or LZMA SDK to create .B .lzma files with known uncompressed size. . .SS "Unsupported .lzma files" The .B .lzma format allows .I lc values up to 8, and .I lp values up to 4. LZMA Utils can decompress files with any .I lc and .IR lp , but always creates files with .B lc=3 and .BR lp=0 . Creating files with other .I lc and .I lp is possible with .B xz and with LZMA SDK. .PP The implementation of the LZMA1 filter in liblzma requires that the sum of .I lc and .I lp must not exceed 4. Thus, .B .lzma files, which exceed this limitation, cannot be decompressed with .BR xz . .PP LZMA Utils creates only .B .lzma files which have a dictionary size of .RI "2^" n (a power of 2) but accepts files with any dictionary size. liblzma accepts only .B .lzma files which have a dictionary size of .RI "2^" n or .RI "2^" n " + 2^(" n "\-1)." This is to decrease false positives when detecting .B .lzma files. .PP These limitations shouldn't be a problem in practice, since practically all .B .lzma files have been compressed with settings that liblzma will accept. . .SS "Trailing garbage" When decompressing, LZMA Utils silently ignore everything after the first .B .lzma stream. In most situations, this is a bug. This also means that LZMA Utils don't support decompressing concatenated .B .lzma files. .PP If there is data left after the first .B .lzma stream, .B xz considers the file to be corrupt unless .B \-\-single\-stream was used. This may break obscure scripts which have assumed that trailing garbage is ignored. . .SH NOTES . .SS "Compressed output may vary" The exact compressed output produced from the same uncompressed input file may vary between XZ Utils versions even if compression options are identical. This is because the encoder can be improved (faster or better compression) without affecting the file format. The output can vary even between different builds of the same XZ Utils version, if different build options are used. .PP The above means that once .B \-\-rsyncable has been implemented, the resulting files won't necessarily be rsyncable unless both old and new files have been compressed with the same xz version. This problem can be fixed if a part of the encoder implementation is frozen to keep rsyncable output stable across xz versions. . .SS "Embedded .xz decompressors" Embedded .B .xz decompressor implementations like XZ Embedded don't necessarily support files created with integrity .I check types other than .B none and .BR crc32 . Since the default is .BR \-\-check=crc64 , you must use .B \-\-check=none or .B \-\-check=crc32 when creating files for embedded systems. .PP Outside embedded systems, all .B .xz format decompressors support all the .I check types, or at least are able to decompress the file without verifying the integrity check if the particular .I check is not supported. .PP XZ Embedded supports BCJ filters, but only with the default start offset. . .SH EXAMPLES . .SS Basics Compress the file .I foo into .I foo.xz using the default compression level .RB ( \-6 ), and remove .I foo if compression is successful: .RS .PP .nf .ft CW xz foo .ft R .fi .RE .PP Decompress .I bar.xz into .I bar and don't remove .I bar.xz even if decompression is successful: .RS .PP .nf .ft CW xz \-dk bar.xz .ft R .fi .RE .PP Create .I baz.tar.xz with the preset .B \-4e .RB ( "\-4 \-\-extreme" ), -which is slower than e.g. the default +which is slower than the default .BR \-6 , but needs less memory for compression and decompression (48\ MiB and 5\ MiB, respectively): .RS .PP .nf .ft CW tar cf \- baz | xz \-4e > baz.tar.xz .ft R .fi .RE .PP A mix of compressed and uncompressed files can be decompressed to standard output with a single command: .RS .PP .nf .ft CW xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt .ft R .fi .RE . .SS "Parallel compression of many files" On GNU and *BSD, .BR find (1) and .BR xargs (1) can be used to parallelize compression of many files: .RS .PP .nf .ft CW find . \-type f \e! \-name '*.xz' \-print0 \e | xargs \-0r \-P4 \-n16 xz \-T1 .ft R .fi .RE .PP The .B \-P option to .BR xargs (1) sets the number of parallel .B xz processes. The best value for the .B \-n option depends on how many files there are to be compressed. If there are only a couple of files, the value should probably be 1; with tens of thousands of files, 100 or even more may be appropriate to reduce the number of .B xz processes that .BR xargs (1) will eventually create. .PP The option .B \-T1 for .B xz is there to force it to single-threaded mode, because .BR xargs (1) is used to control the amount of parallelization. . .SS "Robot mode" Calculate how many bytes have been saved in total after compressing multiple files: .RS .PP .nf .ft CW xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}' .ft R .fi .RE .PP A script may want to know that it is using new enough .BR xz . The following .BR sh (1) script checks that the version number of the .B xz tool is at least 5.0.0. This method is compatible with old beta versions, which didn't support the .B \-\-robot option: .RS .PP .nf .ft CW if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" || [ "$XZ_VERSION" \-lt 50000002 ]; then echo "Your xz is too old." fi unset XZ_VERSION LIBLZMA_VERSION .ft R .fi .RE .PP Set a memory usage limit for decompression using .BR XZ_OPT , but if a limit has already been set, don't increase it: .RS .PP .nf .ft CW -NEWLIM=$((123 << 20)) # 123 MiB +NEWLIM=$((123 << 20))\ \ # 123 MiB OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3) if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM" export XZ_OPT fi .ft R .fi .RE . .SS "Custom compressor filter chains" The simplest use for custom filter chains is customizing a LZMA2 preset. This can be useful, because the presets cover only a subset of the potentially useful combinations of compression settings. .PP The CompCPU columns of the tables from the descriptions of the options .BR "\-0" " ... " "\-9" and .B \-\-extreme are useful when customizing LZMA2 presets. Here are the relevant parts collected from those two tables: .RS .PP .TS tab(;); c c n n. Preset;CompCPU \-0;0 \-1;1 \-2;2 \-3;3 \-4;4 \-5;5 \-6;6 \-5e;7 \-6e;8 .TE .RE .PP If you know that a file requires -somewhat big dictionary (e.g. 32 MiB) to compress well, +somewhat big dictionary (for example, 32\ MiB) to compress well, but you want to compress it quicker than .B "xz \-8" -would do, a preset with a low CompCPU value (e.g. 1) +would do, a preset with a low CompCPU value (for example, 1) can be modified to use a bigger dictionary: .RS .PP .nf .ft CW xz \-\-lzma2=preset=1,dict=32MiB foo.tar .ft R .fi .RE .PP With certain files, the above command may be faster than .B "xz \-6" while compressing significantly better. However, it must be emphasized that only some files benefit from a big dictionary while keeping the CompCPU value low. The most obvious situation, where a big dictionary can help a lot, is an archive containing very similar files of at least a few megabytes each. The dictionary size has to be significantly bigger than any individual file to allow LZMA2 to take full advantage of the similarities between consecutive files. .PP If very high compressor and decompressor memory usage is fine, and the file being compressed is at least several hundred megabytes, it may be useful to use an even bigger dictionary than the 64 MiB that .B "xz \-9" would use: .RS .PP .nf .ft CW xz \-vv \-\-lzma2=dict=192MiB big_foo.tar .ft R .fi .RE .PP Using .B \-vv .RB ( "\-\-verbose \-\-verbose" ) like in the above example can be useful to see the memory requirements of the compressor and decompressor. Remember that using a dictionary bigger than the size of the uncompressed file is waste of memory, so the above command isn't useful for small files. .PP Sometimes the compression time doesn't matter, -but the decompressor memory usage has to be kept low -e.g. to make it possible to decompress the file on -an embedded system. +but the decompressor memory usage has to be kept low, for example, +to make it possible to decompress the file on an embedded system. The following command uses .B \-6e .RB ( "\-6 \-\-extreme" ) as a base and sets the dictionary to only 64\ KiB. The resulting file can be decompressed with XZ Embedded (that's why there is .BR \-\-check=crc32 ) using about 100\ KiB of memory. .RS .PP .nf .ft CW xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo .ft R .fi .RE .PP If you want to squeeze out as many bytes as possible, adjusting the number of literal context bits .RI ( lc ) and number of position bits .RI ( pb ) can sometimes help. Adjusting the number of literal position bits .RI ( lp ) might help too, but usually .I lc and .I pb are more important. -E.g. a source code archive contains mostly US-ASCII text, +For example, a source code archive contains mostly US-ASCII text, so something like the following might give slightly (like 0.1\ %) smaller file than .B "xz \-6e" (try also without .BR lc=4 ): .RS .PP .nf .ft CW xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar .ft R .fi .RE .PP Using another filter together with LZMA2 can improve compression with certain file types. -E.g. to compress a x86-32 or x86-64 shared library +For example, to compress a x86-32 or x86-64 shared library using the x86 BCJ filter: .RS .PP .nf .ft CW xz \-\-x86 \-\-lzma2 libfoo.so .ft R .fi .RE .PP Note that the order of the filter options is significant. If .B \-\-x86 is specified after .BR \-\-lzma2 , .B xz will give an error, because there cannot be any filter after LZMA2, and also because the x86 BCJ filter cannot be used as the last filter in the chain. .PP The Delta filter together with LZMA2 can give good results with bitmap images. It should usually beat PNG, which has a few more advanced filters than simple delta but uses Deflate for the actual compression. .PP The image has to be saved in uncompressed format, -e.g. as uncompressed TIFF. +for example, as uncompressed TIFF. The distance parameter of the Delta filter is set to match the number of bytes per pixel in the image. -E.g. 24-bit RGB bitmap needs +For example, 24-bit RGB bitmap needs .BR dist=3 , and it is also good to pass .B pb=0 to LZMA2 to accommodate the three-byte alignment: .RS .PP .nf .ft CW xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff .ft R .fi .RE .PP -If multiple images have been put into a single archive (e.g.\& +If multiple images have been put into a single archive (for example, .BR .tar ), the Delta filter will work on that too as long as all images have the same number of bytes per pixel. . .SH "SEE ALSO" .BR xzdec (1), .BR xzdiff (1), .BR xzgrep (1), .BR xzless (1), .BR xzmore (1), .BR gzip (1), .BR bzip2 (1), .BR 7z (1) .PP XZ Utils: .br XZ Embedded: .br LZMA SDK: diff --git a/lib/liblzma/Symbol.map b/lib/liblzma/Symbol.map index f49f29f24987..c3208527764d 100644 --- a/lib/liblzma/Symbol.map +++ b/lib/liblzma/Symbol.map @@ -1,203 +1,199 @@ -/* - * $FreeBSD$ - */ - XZ_5.0 { lzma_alone_decoder; lzma_alone_encoder; lzma_auto_decoder; lzma_block_buffer_bound; lzma_block_buffer_decode; lzma_block_buffer_encode; lzma_block_compressed_size; lzma_block_decoder; lzma_block_encoder; lzma_block_header_decode; lzma_block_header_encode; lzma_block_header_size; lzma_block_total_size; lzma_block_unpadded_size; lzma_check_is_supported; lzma_check_size; lzma_code; lzma_crc32; lzma_crc64; lzma_easy_buffer_encode; lzma_easy_decoder_memusage; lzma_easy_encoder; lzma_easy_encoder_memusage; lzma_end; lzma_filter_decoder_is_supported; lzma_filter_encoder_is_supported; lzma_filter_flags_decode; lzma_filter_flags_encode; lzma_filter_flags_size; lzma_filters_copy; lzma_filters_update; lzma_get_check; lzma_index_append; lzma_index_block_count; lzma_index_buffer_decode; lzma_index_buffer_encode; lzma_index_cat; lzma_index_checks; lzma_index_decoder; lzma_index_dup; lzma_index_encoder; lzma_index_end; lzma_index_file_size; lzma_index_hash_append; lzma_index_hash_decode; lzma_index_hash_end; lzma_index_hash_init; lzma_index_hash_size; lzma_index_init; lzma_index_iter_init; lzma_index_iter_locate; lzma_index_iter_next; lzma_index_iter_rewind; lzma_index_memusage; lzma_index_memused; lzma_index_size; lzma_index_stream_count; lzma_index_stream_flags; lzma_index_stream_padding; lzma_index_stream_size; lzma_index_total_size; lzma_index_uncompressed_size; lzma_lzma_preset; lzma_memlimit_get; lzma_memlimit_set; lzma_memusage; lzma_mf_is_supported; lzma_mode_is_supported; lzma_physmem; lzma_properties_decode; lzma_properties_encode; lzma_properties_size; lzma_raw_buffer_decode; lzma_raw_buffer_encode; lzma_raw_decoder; lzma_raw_decoder_memusage; lzma_raw_encoder; lzma_raw_encoder_memusage; lzma_stream_buffer_bound; lzma_stream_buffer_decode; lzma_stream_buffer_encode; lzma_stream_decoder; lzma_stream_encoder; lzma_stream_flags_compare; lzma_stream_footer_decode; lzma_stream_footer_encode; lzma_stream_header_decode; lzma_stream_header_encode; lzma_version_number; lzma_version_string; lzma_vli_decode; lzma_vli_encode; lzma_vli_size; }; XZ_5.2 { lzma_block_uncomp_encode; - lzma_cputhreads; + lzma_cputhreads; lzma_get_progress; lzma_stream_encoder_mt; lzma_stream_encoder_mt_memusage; }; XZprivate_1.0 { lzma_alloc; lzma_alloc_zero; lzma_alone_decoder_init; lzma_block_buffer_bound64; lzma_block_decoder_init; lzma_block_encoder_init; lzma_bufcpy; lzma_check_finish; lzma_check_init; lzma_check_update; lzma_delta_coder_init; lzma_delta_coder_memusage; lzma_delta_decoder_init; lzma_delta_encoder_init; lzma_delta_props_decode; lzma_delta_props_encode; lzma_easy_preset; lzma_free; lzma_index_encoder_init; lzma_index_padding_size; lzma_index_prealloc; lzma_lz_decoder_init; lzma_lz_decoder_memusage; lzma_lz_decoder_uncompressed; lzma_lz_encoder_init; lzma_lz_encoder_memusage; lzma_lzma2_block_size; lzma_lzma2_decoder_init; lzma_lzma2_decoder_memusage; lzma_lzma2_encoder_init; lzma_lzma2_encoder_memusage; lzma_lzma2_props_decode; lzma_lzma2_props_encode; lzma_lzma_decoder_create; lzma_lzma_decoder_init; lzma_lzma_decoder_memusage; lzma_lzma_decoder_memusage_nocheck; lzma_lzma_encode; lzma_lzma_encoder_create; lzma_lzma_encoder_init; lzma_lzma_encoder_memusage; lzma_lzma_encoder_reset; lzma_lzma_lclppb_decode; lzma_lzma_lclppb_encode; lzma_lzma_optimum_fast; lzma_lzma_optimum_normal; lzma_lzma_props_decode; lzma_lzma_props_encode; lzma_mf_bt2_find; lzma_mf_bt2_skip; lzma_mf_bt3_find; lzma_mf_bt3_skip; lzma_mf_bt4_find; lzma_mf_bt4_skip; lzma_mf_find; lzma_mf_hc3_find; lzma_mf_hc3_skip; lzma_mf_hc4_find; lzma_mf_hc4_skip; lzma_mt_block_size; lzma_next_end; lzma_next_filter_init; lzma_next_filter_update; lzma_outq_end; lzma_outq_get_buf; lzma_outq_init; lzma_outq_is_readable; lzma_outq_memusage; lzma_outq_read; lzma_raw_coder_init; lzma_raw_coder_memusage; lzma_raw_decoder_init; lzma_raw_encoder_init; lzma_simple_arm_decoder_init; lzma_simple_arm_encoder_init; lzma_simple_armthumb_decoder_init; lzma_simple_armthumb_encoder_init; lzma_simple_coder_init; lzma_simple_ia64_decoder_init; lzma_simple_ia64_encoder_init; lzma_simple_powerpc_decoder_init; lzma_simple_powerpc_encoder_init; lzma_simple_props_decode; lzma_simple_props_encode; lzma_simple_props_size; lzma_simple_sparc_decoder_init; lzma_simple_sparc_encoder_init; lzma_simple_x86_decoder_init; lzma_simple_x86_encoder_init; lzma_stream_decoder_init; lzma_strm_init; lzma_tuklib_cpucores; lzma_tuklib_physmem; }; diff --git a/lib/liblzma/config.h b/lib/liblzma/config.h index f674cbf66fdb..467ed68ce57d 100644 --- a/lib/liblzma/config.h +++ b/lib/liblzma/config.h @@ -1,530 +1,588 @@ -/* $FreeBSD$ */ /* config.h. Generated from config.h.in by configure. */ /* config.h.in. Generated from configure.ac by autoheader. */ /* Define if building universal (internal helper macro) */ /* #undef AC_APPLE_UNIVERSAL_BUILD */ /* How many MiB of RAM to assume if the real amount cannot be determined. */ #define ASSUME_RAM 128 /* Define to 1 if translation of program messages to the user's native language is requested. */ /* FreeBSD - disabled intentionally */ /* #undef ENABLE_NLS */ /* Define to 1 if bswap_16 is available. */ /* #undef HAVE_BSWAP_16 */ /* Define to 1 if bswap_32 is available. */ /* #undef HAVE_BSWAP_32 */ /* Define to 1 if bswap_64 is available. */ /* #undef HAVE_BSWAP_64 */ /* Define to 1 if you have the header file. */ /* #undef HAVE_BYTESWAP_H */ /* Define to 1 if Capsicum is available. */ #define HAVE_CAPSICUM 1 /* Define to 1 if the system has the type `CC_SHA256_CTX'. */ /* #undef HAVE_CC_SHA256_CTX */ /* Define to 1 if you have the `CC_SHA256_Init' function. */ /* #undef HAVE_CC_SHA256_INIT */ -/* Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the - CoreFoundation framework. */ -/* #undef HAVE_CFLOCALECOPYCURRENT */ - /* Define to 1 if you have the Mac OS X function CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */ /* #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES */ /* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in the CoreFoundation framework. */ /* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */ /* Define to 1 if crc32 integrity check is enabled. */ #define HAVE_CHECK_CRC32 1 /* Define to 1 if crc64 integrity check is enabled. */ #define HAVE_CHECK_CRC64 1 /* Define to 1 if sha256 integrity check is enabled. */ #define HAVE_CHECK_SHA256 1 /* Define to 1 if you have the `clock_gettime' function. */ #define HAVE_CLOCK_GETTIME 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_COMMONCRYPTO_COMMONDIGEST_H */ /* Define if the GNU dcgettext() function is already present or preinstalled. */ /* FreeBSD - disabled intentionally */ /* #undef HAVE_DCGETTEXT */ /* Define to 1 if you have the declaration of `CLOCK_MONOTONIC', and to 0 if you don't. */ #define HAVE_DECL_CLOCK_MONOTONIC 1 /* Define to 1 if you have the declaration of `program_invocation_name', and to 0 if you don't. */ #define HAVE_DECL_PROGRAM_INVOCATION_NAME 0 /* Define to 1 if any of HAVE_DECODER_foo have been defined. */ #define HAVE_DECODERS 1 /* Define to 1 if arm decoder is enabled. */ #define HAVE_DECODER_ARM 1 /* Define to 1 if armthumb decoder is enabled. */ #define HAVE_DECODER_ARMTHUMB 1 /* Define to 1 if delta decoder is enabled. */ #define HAVE_DECODER_DELTA 1 /* Define to 1 if ia64 decoder is enabled. */ #define HAVE_DECODER_IA64 1 /* Define to 1 if lzma1 decoder is enabled. */ #define HAVE_DECODER_LZMA1 1 /* Define to 1 if lzma2 decoder is enabled. */ #define HAVE_DECODER_LZMA2 1 /* Define to 1 if powerpc decoder is enabled. */ #define HAVE_DECODER_POWERPC 1 /* Define to 1 if sparc decoder is enabled. */ #define HAVE_DECODER_SPARC 1 /* Define to 1 if x86 decoder is enabled. */ #define HAVE_DECODER_X86 1 /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* Define to 1 if any of HAVE_ENCODER_foo have been defined. */ #define HAVE_ENCODERS 1 /* Define to 1 if arm encoder is enabled. */ #define HAVE_ENCODER_ARM 1 /* Define to 1 if armthumb encoder is enabled. */ #define HAVE_ENCODER_ARMTHUMB 1 /* Define to 1 if delta encoder is enabled. */ #define HAVE_ENCODER_DELTA 1 /* Define to 1 if ia64 encoder is enabled. */ #define HAVE_ENCODER_IA64 1 /* Define to 1 if lzma1 encoder is enabled. */ #define HAVE_ENCODER_LZMA1 1 /* Define to 1 if lzma2 encoder is enabled. */ #define HAVE_ENCODER_LZMA2 1 /* Define to 1 if powerpc encoder is enabled. */ #define HAVE_ENCODER_POWERPC 1 /* Define to 1 if sparc encoder is enabled. */ #define HAVE_ENCODER_SPARC 1 /* Define to 1 if x86 encoder is enabled. */ #define HAVE_ENCODER_X86 1 /* Define to 1 if you have the header file. */ #define HAVE_FCNTL_H 1 /* Define to 1 if you have the `futimens' function. */ #define HAVE_FUTIMENS 1 /* Define to 1 if you have the `futimes' function. */ /* #undef HAVE_FUTIMES */ /* Define to 1 if you have the `futimesat' function. */ /* #undef HAVE_FUTIMESAT */ /* Define to 1 if you have the header file. */ #define HAVE_GETOPT_H 1 /* Define to 1 if you have the `getopt_long' function. */ #define HAVE_GETOPT_LONG 1 /* Define if the GNU gettext() function is already present or preinstalled. */ /* FreeBSD - disabled intentionally */ /* #undef HAVE_GETTEXT */ /* Define if you have the iconv() function and it works. */ #define HAVE_ICONV 1 /* Define to 1 if you have the header file. */ /* FreeBSD - only with clang because the base gcc does not support it */ #if defined(__clang__) && defined(__FreeBSD__) && defined(__amd64__) #define HAVE_IMMINTRIN_H 1 #endif /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* Define to 1 if you have the header file. */ #define HAVE_LIMITS_H 1 /* Define to 1 if mbrtowc and mbstate_t are properly declared. */ #define HAVE_MBRTOWC 1 -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - /* Define to 1 to enable bt2 match finder. */ #define HAVE_MF_BT2 1 /* Define to 1 to enable bt3 match finder. */ #define HAVE_MF_BT3 1 /* Define to 1 to enable bt4 match finder. */ #define HAVE_MF_BT4 1 /* Define to 1 to enable hc3 match finder. */ #define HAVE_MF_HC3 1 /* Define to 1 to enable hc4 match finder. */ #define HAVE_MF_HC4 1 +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MINIX_CONFIG_H */ + /* Define to 1 if getopt.h declares extern int optreset. */ #define HAVE_OPTRESET 1 /* Define to 1 if you have the `posix_fadvise' function. */ #define HAVE_POSIX_FADVISE 1 /* Define to 1 if you have the `pthread_condattr_setclock' function. */ #define HAVE_PTHREAD_CONDATTR_SETCLOCK 1 /* Have PTHREAD_PRIO_INHERIT. */ #define HAVE_PTHREAD_PRIO_INHERIT 1 /* Define to 1 if you have the `SHA256Init' function. */ /* #undef HAVE_SHA256INIT */ /* Define to 1 if the system has the type `SHA256_CTX'. */ #define HAVE_SHA256_CTX 1 /* Define to 1 if you have the header file. */ #define HAVE_SHA256_H 1 /* Define to 1 if you have the `SHA256_Init' function. */ #define HAVE_SHA256_INIT 1 /* Define to 1 if the system has the type `SHA2_CTX'. */ /* #undef HAVE_SHA2_CTX */ /* Define to 1 if you have the header file. */ /* #undef HAVE_SHA2_H */ /* Define to 1 if optimizing for size. */ /* #undef HAVE_SMALL */ /* Define to 1 if stdbool.h conforms to C99. */ #define HAVE_STDBOOL_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if `st_atimensec' is a member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_ATIMENSEC 1 /* Define to 1 if `st_atimespec.tv_nsec' is a member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC 1 /* Define to 1 if `st_atim.st__tim.tv_nsec' is a member of `struct stat'. */ /* #undef HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC */ /* Define to 1 if `st_atim.tv_nsec' is a member of `struct stat'. */ #define HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC 1 /* Define to 1 if `st_uatime' is a member of `struct stat'. */ /* #undef HAVE_STRUCT_STAT_ST_UATIME */ +/* Define to 1 to if GNU/Linux-specific details are wanted for symbol + versioning. This must be used together with liblzma_linux.map. */ +/* #undef HAVE_SYMBOL_VERSIONS_LINUX */ + /* Define to 1 if you have the header file. */ /* #undef HAVE_SYS_BYTEORDER_H */ /* Define to 1 if you have the header file. */ #define HAVE_SYS_CAPSICUM_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_ENDIAN_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_PARAM_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TIME_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if the system has the type `uintptr_t'. */ #define HAVE_UINTPTR_T 1 /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* Define to 1 if you have the `utime' function. */ /* #undef HAVE_UTIME */ /* Define to 1 if you have the `utimes' function. */ /* #undef HAVE_UTIMES */ /* Define to 1 or 0, depending whether the compiler supports simple visibility declarations. */ #define HAVE_VISIBILITY 1 +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + /* Define to 1 if you have the `wcwidth' function. */ #define HAVE_WCWIDTH 1 /* Define to 1 if the system has the type `_Bool'. */ #define HAVE__BOOL 1 /* Define to 1 if you have the `_futime' function. */ /* #undef HAVE__FUTIME */ /* Define to 1 if _mm_movemask_epi8 is available. */ #if defined(__FreeBSD__) && defined(__amd64__) #define HAVE__MM_MOVEMASK_EPI8 1 #endif /* Define to 1 if the GNU C extension __builtin_assume_aligned is supported. */ #define HAVE___BUILTIN_ASSUME_ALIGNED 1 /* Define to 1 if the GNU C extensions __builtin_bswap16/32/64 are supported. */ #define HAVE___BUILTIN_BSWAPXX 1 /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" /* Define to 1 when using POSIX threads (pthreads). */ #define MYTHREAD_POSIX 1 /* Define to 1 when using Windows Vista compatible threads. This uses features that are not available on Windows XP. */ /* #undef MYTHREAD_VISTA */ /* Define to 1 when using Windows 95 (and thus XP) compatible threads. This avoids use of features that were added in Windows Vista. */ /* #undef MYTHREAD_WIN95 */ /* Define to 1 to disable debugging code. */ #define NDEBUG 1 /* Name of package */ #define PACKAGE "xz" /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "lasse.collin@tukaani.org" /* Define to the full name of this package. */ #define PACKAGE_NAME "XZ Utils" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "XZ Utils 5.2.5" +#define PACKAGE_STRING "XZ Utils 5.2.8" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "xz" /* Define to the home page for this package. */ #define PACKAGE_URL "https://tukaani.org/xz/" /* Define to the version of this package. */ -#define PACKAGE_VERSION "5.2.5" +#define PACKAGE_VERSION "5.2.8" /* Define to necessary symbol if this constant uses a non-standard name on your system. */ /* #undef PTHREAD_CREATE_JOINABLE */ /* The size of `size_t', as computed by sizeof. */ #define SIZEOF_SIZE_T 8 -/* Define to 1 if you have the ANSI C header files. */ +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ #define STDC_HEADERS 1 /* Define to 1 if the number of available CPU cores can be detected with cpuset(2). */ #define TUKLIB_CPUCORES_CPUSET 1 /* Define to 1 if the number of available CPU cores can be detected with pstat_getdynamic(). */ /* #undef TUKLIB_CPUCORES_PSTAT_GETDYNAMIC */ /* Define to 1 if the number of available CPU cores can be detected with sched_getaffinity() */ /* #undef TUKLIB_CPUCORES_SCHED_GETAFFINITY */ /* Define to 1 if the number of available CPU cores can be detected with sysconf(_SC_NPROCESSORS_ONLN) or sysconf(_SC_NPROC_ONLN). */ /* #undef TUKLIB_CPUCORES_SYSCONF */ /* Define to 1 if the number of available CPU cores can be detected with sysctl(). */ /* #undef TUKLIB_CPUCORES_SYSCTL */ /* Define to 1 if the system supports fast unaligned access to 16-bit and 32-bit integers. */ /* FreeBSD - derive from __NO_STRICT_ALIGNMENT */ /* #undef TUKLIB_FAST_UNALIGNED_ACCESS */ /* Define to 1 if the amount of physical memory can be detected with _system_configuration.physmem. */ /* #undef TUKLIB_PHYSMEM_AIX */ /* Define to 1 if the amount of physical memory can be detected with getinvent_r(). */ /* #undef TUKLIB_PHYSMEM_GETINVENT_R */ /* Define to 1 if the amount of physical memory can be detected with getsysinfo(). */ /* #undef TUKLIB_PHYSMEM_GETSYSINFO */ /* Define to 1 if the amount of physical memory can be detected with pstat_getstatic(). */ /* #undef TUKLIB_PHYSMEM_PSTAT_GETSTATIC */ /* Define to 1 if the amount of physical memory can be detected with sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES). */ #define TUKLIB_PHYSMEM_SYSCONF 1 /* Define to 1 if the amount of physical memory can be detected with sysctl(). */ /* #undef TUKLIB_PHYSMEM_SYSCTL */ /* Define to 1 if the amount of physical memory can be detected with Linux sysinfo(). */ /* #undef TUKLIB_PHYSMEM_SYSINFO */ /* Define to 1 to use unsafe type punning, e.g. char *x = ...; *(int *)x = 123; which violates strict aliasing rules and thus is undefined behavior and might result in broken code. */ /* #undef TUKLIB_USE_UNSAFE_TYPE_PUNNING */ /* Enable extensions on AIX 3, Interix. */ #ifndef _ALL_SOURCE # define _ALL_SOURCE 1 #endif +/* Enable general extensions on macOS. */ +#ifndef _DARWIN_C_SOURCE +# define _DARWIN_C_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif /* Enable GNU extensions on systems that have them. */ #ifndef _GNU_SOURCE # define _GNU_SOURCE 1 #endif -/* Enable threading extensions on Solaris. */ +/* Enable X/Open compliant socket functions that do not require linking + with -lxnet on HP-UX 11.11. */ +#ifndef _HPUX_ALT_XOPEN_SOCKET_API +# define _HPUX_ALT_XOPEN_SOCKET_API 1 +#endif +/* Identify the host operating system as Minix. + This macro does not affect the system headers' behavior. + A future release of Autoconf may stop defining this macro. */ +#ifndef _MINIX +/* # undef _MINIX */ +#endif +/* Enable general extensions on NetBSD. + Enable NetBSD compatibility extensions on Minix. */ +#ifndef _NETBSD_SOURCE +# define _NETBSD_SOURCE 1 +#endif +/* Enable OpenBSD compatibility extensions on NetBSD. + Oddly enough, this does nothing on OpenBSD. */ +#ifndef _OPENBSD_SOURCE +# define _OPENBSD_SOURCE 1 +#endif +/* Define to 1 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_SOURCE +/* # undef _POSIX_SOURCE */ +#endif +/* Define to 2 if needed for POSIX-compatible behavior. */ +#ifndef _POSIX_1_SOURCE +/* # undef _POSIX_1_SOURCE */ +#endif +/* Enable POSIX-compatible threading on Solaris. */ #ifndef _POSIX_PTHREAD_SEMANTICS # define _POSIX_PTHREAD_SEMANTICS 1 #endif +/* Enable extensions specified by ISO/IEC TS 18661-5:2014. */ +#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__ +# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-1:2014. */ +#ifndef __STDC_WANT_IEC_60559_BFP_EXT__ +# define __STDC_WANT_IEC_60559_BFP_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-2:2015. */ +#ifndef __STDC_WANT_IEC_60559_DFP_EXT__ +# define __STDC_WANT_IEC_60559_DFP_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-4:2015. */ +#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__ +# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TS 18661-3:2015. */ +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1 +#endif +/* Enable extensions specified by ISO/IEC TR 24731-2:2010. */ +#ifndef __STDC_WANT_LIB_EXT2__ +# define __STDC_WANT_LIB_EXT2__ 1 +#endif +/* Enable extensions specified by ISO/IEC 24747:2009. */ +#ifndef __STDC_WANT_MATH_SPEC_FUNCS__ +# define __STDC_WANT_MATH_SPEC_FUNCS__ 1 +#endif /* Enable extensions on HP NonStop. */ #ifndef _TANDEM_SOURCE # define _TANDEM_SOURCE 1 #endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# define __EXTENSIONS__ 1 +/* Enable X/Open extensions. Define to 500 only if necessary + to make mbstate_t available. */ +#ifndef _XOPEN_SOURCE +/* # undef _XOPEN_SOURCE */ #endif /* Version number of package */ -#define VERSION "5.2.5" +#define VERSION "5.2.8" /* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel). */ #if defined(__FreeBSD__) #include #if defined(__NO_STRICT_ALIGNMENT) #define TUKLIB_FAST_UNALIGNED_ACCESS 1 #endif #include #if _BYTE_ORDER == _BIG_ENDIAN # define WORDS_BIGENDIAN 1 #endif #endif -/* Enable large inode numbers on Mac OS X 10.5. */ -#ifndef _DARWIN_USE_64_BIT_INODE -# define _DARWIN_USE_64_BIT_INODE 1 -#endif - /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ -/* Define to 1 if on MINIX. */ -/* #undef _MINIX */ - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -/* #undef _POSIX_1_SOURCE */ - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -/* #undef _POSIX_SOURCE */ - /* Define for Solaris 2.5.1 so the uint32_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ /* #undef _UINT32_T */ /* Define for Solaris 2.5.1 so the uint64_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ /* #undef _UINT64_T */ /* Define for Solaris 2.5.1 so the uint8_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ /* #undef _UINT8_T */ /* Define to rpl_ if the getopt replacement functions and variables should be used. */ /* #undef __GETOPT_PREFIX */ /* Define to the type of a signed integer type of width exactly 32 bits if such a type exists and the standard includes do not define it. */ /* #undef int32_t */ /* Define to the type of a signed integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ /* #undef int64_t */ /* Define to the type of an unsigned integer type of width exactly 16 bits if such a type exists and the standard includes do not define it. */ /* #undef uint16_t */ /* Define to the type of an unsigned integer type of width exactly 32 bits if such a type exists and the standard includes do not define it. */ /* #undef uint32_t */ /* Define to the type of an unsigned integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ /* #undef uint64_t */ /* Define to the type of an unsigned integer type of width exactly 8 bits if such a type exists and the standard includes do not define it. */ /* #undef uint8_t */ /* Define to the type of an unsigned integer type wide enough to hold a pointer, if such a type exists, and if the system does not define it. */ /* #undef uintptr_t */