diff --git a/ChangeLog b/ChangeLog index 140106eb3d66..5ee316a61591 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6420 +1,6765 @@ +commit b69900ed0b2f914fc6c0a180dcb522dbe5b80ea7 +Author: Lasse Collin +Date: Sun Jun 30 18:02:27 2013 +0300 + + Man pages: Use similar syntax for synopsis as in xz. + + The man pages of lzmainfo, xzmore, and xzdec had similar + constructs as the man page of xz had before the commit + eb6ca9854b8eb9fbf72497c1cf608d6b19d2d494. Eric S. Raymond + didn't mention these man pages in his bug report, but + it's nice to be consistent. + +commit cf4a1e1879d89be314ef3c064bd2656ea452f87e +Author: Lasse Collin +Date: Sun Jun 30 15:55:09 2013 +0300 + + Update NEWS for 5.0.5. + +commit cb94bb6d1f34e1e93c2d634ea9c3b7dfb3981d05 +Author: Lasse Collin +Date: Sun Jun 30 15:54:38 2013 +0300 + + Bump version and soname for 5.0.5. + +commit b7dee202d5b041ccae028d0c5433b83cecbe9e5d +Author: Lasse Collin +Date: Fri Jun 28 23:56:17 2013 +0300 + + xz: Fix return value type in io_write_buf(). + + It didn't affect the behavior of the code since -1 + becomes true anyway. + +commit 265e7b44d804b47373f10b7da28350db7611cea6 +Author: Lasse Collin +Date: Fri Jun 28 18:46:13 2013 +0300 + + xz: Remove an outdated NetBSD-specific comment. + + Nowadays errno == EFTYPE is documented in open(2). + +commit 78c2f8db902195468b8249c432252a6b281db836 +Author: Lasse Collin +Date: Fri Jun 28 18:09:47 2013 +0300 + + xz: Fix error detection of fcntl(fd, F_SETFL, flags) calls. + + POSIX says that fcntl(fd, F_SETFL, flags) returns -1 on + error and "other than -1" on success. This is how it is + documented e.g. on OpenBSD too. On Linux, success with + F_SETFL is always 0 (at least accorinding to fcntl(2) + from man-pages 3.51). + +commit 91750dff8f2c654ff636f12a2acdffe5492374b3 +Author: Lasse Collin +Date: Fri Jun 28 17:36:47 2013 +0300 + + xz: Fix use of wrong variable in a fcntl() call. + + Due to a wrong variable name, when writing a sparse file + to standard output, *all* file status flags were cleared + (to the extent the operating system allowed it) instead of + only clearing the O_APPEND flag. In practice this worked + fine in the common situations on GNU/Linux, but I didn't + check how it behaved elsewhere. + + The original flags were still restored correctly. I still + changed the code to use a separate boolean variable to + indicate when the flags should be restored instead of + relying on a special value in stdout_flags. + +commit e11888a79a4a77a69afde60445880d44f63d01aa +Author: Lasse Collin +Date: Wed Jun 26 13:30:57 2013 +0300 + + xz: Check the value of lzma_stream_flags.version in --list. + + It is a no-op for now, but if an old xz version is used + together with a newer liblzma that supports something new, + then this check becomes important and will stop the old xz + from trying to parse files that it won't understand. + +commit f39ddd88f3222219ada88998cf30abfdd3e0e96c +Author: Lasse Collin +Date: Wed Jun 26 12:17:00 2013 +0300 + + Build: Require Automake 1.12 and use serial-tests option. + + It should actually still work with Automake 1.10 if + the serial-tests option is removed. Automake 1.13 started + using parallel tests by default and the option to get + the old behavior isn't supported before 1.12. + + At least for now, parallel tests don't improve anything + in XZ Utils but they hide the progress output from + test_compress.sh. + +commit cb84e278027a90e9827a6f4d3bb0b4d4744a2fbb +Author: Lasse Collin +Date: Sun Jun 23 17:36:47 2013 +0300 + + xz: Validate Uncompressed Size from Block Header in list.c. + + This affects only "xz -lvv". Normal decompression with xz + already detected if Block Header and Index had mismatched + Uncompressed Size fields. So this just makes "xz -lvv" + show such files as corrupt instead of showing the + Uncompressed Size from Index. + +commit f01780fce454c7489f7dcbf806299b50da5f51b7 +Author: Lasse Collin +Date: Wed Jun 26 10:58:58 2013 +0300 + + Update THANKS. + +commit d98ede7d700b892e32d9c2f46563b6ebc566786d +Author: Lasse Collin +Date: Fri Jun 21 22:04:45 2013 +0300 + + xz: Make the man page more friendly to doclifter. + + Thanks to Eric S. Raymond. + +commit 19b447b64b3f520cd5b11429000b092f7c76709b +Author: Lasse Collin +Date: Fri Jun 21 21:54:59 2013 +0300 + + xz: A couple of man page fixes. + + Now the interaction of presets and custom filter chains + is described correctly. Earlier it contradicted itself. + + Thanks to DevHC who reported these issues on IRC to me + on 2012-12-14. + +commit 45edf2966fc9a4d2eae8f84b2fa027fb4fa1df8b +Author: Lasse Collin +Date: Fri Jun 21 21:50:26 2013 +0300 + + xz: Fix interaction between preset and custom filter chains. + + There was somewhat illogical behavior when --extreme was + specified and mixed with custom filter chains. + + Before this commit, "xz -9 --lzma2 -e" was equivalent + to "xz --lzma2". After it is equivalent to "xz -6e" + (all earlier preset options get forgotten when a custom + filter chain is specified and the default preset is 6 + to which -e is applied). I find this less illogical. + + This also affects the meaning of "xz -9e --lzma2 -7". + Earlier it was equivalent to "xz -7e" (the -e specified + before a custom filter chain wasn't forgotten). Now it + is "xz -7". Note that "xz -7e" still is the same as "xz -e7". + + Hopefully very few cared about this in the first place, + so pretty much no one should even notice this change. + + Thanks to Conley Moorhous. + +commit b065984e5a9272eb50bc0c6d3731e6199c0ae8a8 +Author: Lasse Collin +Date: Fri Apr 8 17:53:05 2011 +0300 + + xz: Change size_t to uint32_t in a few places. + +commit 32be621f52f2e1686db88baa7b01dc1ae338f426 +Author: Lasse Collin +Date: Sat Apr 27 22:07:46 2013 +0300 + + Build: Use -Wvla with GCC if supported. + + Variable-length arrays are mandatory in C99 but optional in C11. + The code doesn't currently use any VLAs and it shouldn't in the + future either to stay compatible with C11 without requiring any + optional C11 features. + +commit efb07cfba65e9e05984c02cd796c1b0338ce04dc +Author: Lasse Collin +Date: Mon Apr 15 19:29:09 2013 +0300 + + xzdec: Improve the --help message. + + The options are now ordered in the same order as in xz's help + message. + + Descriptions were added to the options that are ignored. + I left them in parenthesis even if it looks a bit weird + because I find it easier to spot the ignored vs. non-ignored + options from the list that way. + +commit e3c8be13699e2813f5e2879d8187444b46d82d89 +Author: Lasse Collin +Date: Fri Apr 5 19:25:40 2013 +0300 + + Update THANKS. + +commit ad8282efe483612f6b5544f9a0d2e4914fb2532a +Author: Jeff Bastian +Date: Wed Apr 3 13:59:17 2013 +0200 + + xzgrep: make the '-h' option to be --no-filename equivalent + + * src/scripts/xzgrep.in: Accept the '-h' option in argument parsing. + +commit 9271a3eb0e022b23e8712154be851d0afe4c02e4 +Author: Lasse Collin +Date: Fri Apr 5 19:34:09 2013 +0300 + + liblzma: Be less picky in lzma_alone_decoder(). + + To avoid false positives when detecting .lzma files, + rare values in dictionary size and uncompressed size fields + were rejected. They will still be rejected if .lzma files + are decoded with lzma_auto_decoder(), but when using + lzma_alone_decoder() directly, such files will now be accepted. + Hopefully this is an OK compromise. + + This doesn't affect xz because xz still has its own file + format detection code. This does affect lzmadec though. + So after this commit lzmadec will accept files that xz or + xz-emulating-lzma doesn't. + + NOTE: lzma_alone_decoder() still won't decode all .lzma files + because liblzma's LZMA decoder doesn't support lc + lp > 4. + + Reported here: + http://sourceforge.net/projects/lzmautils/forums/forum/708858/topic/7068827 + + Conflicts: + src/liblzma/common/alone_decoder.c + src/liblzma/common/alone_decoder.h + +commit 211b931cee58626c1d2e021810cb108cb5cbc10f +Author: Lasse Collin +Date: Tue Mar 5 19:14:50 2013 +0200 + + Avoid unneeded use of awk in xzless. + + Use "read" instead of "awk" in xzless to get the version + number of "less". The need for awk was introduced in + the commit db5c1817fabf7cbb9e4087b1576eb26f0747338e. + + Thanks to Ariel P for the patch. + +commit 9f62fd9605eade23b62b07a235d1f02156f7a5c6 +Author: Jonathan Nieder +Date: Mon Nov 19 00:10:10 2012 -0800 + + xzless: Make "less -V" parsing more robust + + In v4.999.9beta~30 (xzless: Support compressed standard input, + 2009-08-09), xzless learned to parse ‘less -V’ output to figure out + whether less is new enough to handle $LESSOPEN settings starting + with “|-”. That worked well for a while, but the version string from + ‘less’ versions 448 (June, 2012) is misparsed, producing a warning: + + $ xzless /tmp/test.xz; echo $? + /usr/bin/xzless: line 49: test: 456 (GNU regular expressions): \ + integer expression expected + 0 + + More precisely, modern ‘less’ lists the regexp implementation along + with its version number, and xzless passes the entire version number + with attached parenthetical phrase as a number to "test $a -gt $b", + producing the above confusing message. + + $ less-444 -V | head -1 + less 444 + $ less -V | head -1 + less 456 (no regular expressions) + + So relax the pattern matched --- instead of expecting "less ", + look for a line of the form "less [ (extra parenthetical)]". + While at it, improve the behavior when no matching line is found --- + instead of producing a cryptic message, we can fall back on a LESSPIPE + setting that is supported by all versions of ‘less’. + + The implementation uses "awk" for simplicity. Hopefully that’s + portable enough. + + Reported-by: Jörg-Volker Peetz + Signed-off-by: Jonathan Nieder + +commit 1d05980f5b5c2c94d833001daccacce4a466876e +Author: Lasse Collin +Date: Wed Oct 3 15:54:24 2012 +0300 + + xz: Fix the note about --rsyncable on the man page. + +commit fb68497333598688d309a92838d91fd560f7e9f0 +Author: Lasse Collin +Date: Fri Sep 28 20:11:09 2012 +0300 + + xz: Improve handling of failed realloc in xrealloc. + + Thanks to Jim Meyering. + +commit 75013db6d4d63c195bd8b8d45729b4be0665a812 +Author: Lasse Collin +Date: Sat Dec 15 20:01:02 2012 +0200 + + A few typo fixes to comments and the xz man page. + + Thanks to Jim Meyering. + +commit e44b21839b1dcbac5097be39b87dd2ddb6e114fd +Author: Lasse Collin +Date: Thu Aug 2 17:13:30 2012 +0300 + + Build: Bump gettext version requirement to 0.18. + + Otherwise too old version of m4/lib-link.m4 gets included + when autoreconf -fi is run. + +commit fd3dbb23ca7e75a7a888d7e897c381dc06308307 +Author: Lasse Collin +Date: Thu Jul 5 07:36:28 2012 +0300 + + Tests: Remove tests/test_block.c that had gotten committed accidentally. + +commit 05a735d279d74af437c31f25f69aded4713c1a3d +Author: Lasse Collin +Date: Thu Jul 5 07:33:35 2012 +0300 + + Build: Include macosx/build.sh in the distribution. + + It has been in the Git repository since 2010 but probably + few people have seen it since it hasn't been included in + the release tarballs. :-( + +commit 4e6d62793b5e7b87edcc93c7ded072c1ecd94173 +Author: Lasse Collin +Date: Thu Jul 5 07:24:45 2012 +0300 + + Docs: Fix the name LZMA Utils -> XZ Utils in debug/README. + +commit dd95b5e7614baf1f07a1316b5106bd616a9efa79 +Author: Lasse Collin +Date: Thu Jul 5 07:23:17 2012 +0300 + + Include debug/translation.bash in the distribution. + + Also fix the script name mentioned in README. + commit 20778053a07eb90c159c1377ca8dc05a90fd530b Author: Lasse Collin Date: Fri Jun 22 14:36:16 2012 +0300 xz: Update man page date to match the latest update. commit 2cefa84af676da37d7e9c466d55d46c67ab00c22 Author: Lasse Collin Date: Fri Jun 22 10:25:43 2012 +0300 Bump version and soname for 5.0.4. commit 433fec191a17e45690809e54146ea7a773f54cff Author: Lasse Collin Date: Fri Jun 22 10:25:09 2012 +0300 Update NEWS for 5.0.4. commit 711fa680f552a4003df73b37e6dc4d6e00b47bcd Author: Lasse Collin Date: Mon Jun 18 21:27:47 2012 +0300 Docs: Language fix to 01_compress_easy.c. Thanks to Jonathan Nieder. commit 3d7ab1dc61a75c560828be5df96598388b771456 Author: Lasse Collin Date: Thu Jun 14 20:15:30 2012 +0300 Fix the top-level Makefile.am for the new example programs. commit ef8b8e5f111469b5bc005975f7abb9abbd372b25 Author: Lasse Collin Date: Thu Jun 14 10:52:33 2012 +0300 Docs: Add new example programs. These have more comments than the old examples and human-readable error messages. More tutorial-like examples are needed but these are a start. commit 75c149bc8045a26f8bc719cb8ed20668dab79091 Author: Lasse Collin Date: Thu Jun 14 10:33:27 2012 +0300 Docs: Move xz_pipe_comp.c and xz_pipe_decomp.c to doc/examples_old. It is good to keep these around to so that if someone has copied the decompressor bug from xz_pipe_decomp.c he has an example how to easily fix it. commit 456307ebf947a5f50bd995d617b99c1215572308 Author: Lasse Collin Date: Thu Jun 14 10:33:01 2012 +0300 Docs: Fix a bug in xz_pipe_decomp.c example program. commit 4c310b8a29bc257e6ccbd2310f12f258678f3fef Author: Lasse Collin Date: Thu May 31 15:53:25 2012 +0300 Translations: Update the Italian translation. Thanks to Milo Casagrande. commit ec32b79366dc47a55ea877589df9e8509ba113a7 Author: Lasse Collin Date: Wed May 30 23:15:07 2012 +0300 Translations: Update the French translation. Thanks to Adrien Nader. commit dd06f40e4dd7649525e4f28d890dc238a3aa37e5 Author: Lasse Collin Date: Tue May 29 22:26:27 2012 +0300 Translations: Update the German translation. commit c66808d1f55d0149ed57c536cc9b52e9c8b583bc Author: Lasse Collin Date: Tue May 29 22:12:57 2012 +0300 Translations: Update Polish translation. commit 556c22dfed195c1466b298183b850d6c28544900 Author: Lasse Collin Date: Tue May 29 13:10:36 2012 +0300 Preliminary NEWS for 5.0.4. commit dd13b66bf582f49d3aec36e3410ff8541b7506da Author: Lasse Collin Date: Mon May 28 20:42:11 2012 +0300 liblzma: Fix possibility of incorrect LZMA_BUF_ERROR. lzma_code() could incorrectly return LZMA_BUF_ERROR if all of the following was true: - The caller knows how many bytes of output to expect and only provides that much output space. - When the last output bytes are decoded, the caller-provided input buffer ends right before the LZMA2 end of payload marker. So LZMA2 won't provide more output anymore, but it won't know it yet and thus won't return LZMA_STREAM_END yet. - A BCJ filter is in use and it hasn't left any unfiltered bytes in the temp buffer. This can happen with any BCJ filter, but in practice it's more likely with filters other than the x86 BCJ. Another situation where the bug can be triggered happens if the uncompressed size is zero bytes and no output space is provided. In this case the decompression can fail even if the whole input file is given to lzma_code(). A similar bug was fixed in XZ Embedded on 2011-09-19. commit a0223bf796fdaad51a11ad02c4195c694849cc78 Author: Lasse Collin Date: Mon May 28 15:38:32 2012 +0300 Update THANKS. commit 86e57e4bfefe3fd8e13615c41604165bb2359501 Author: Lasse Collin Date: Mon May 28 15:37:43 2012 +0300 xz: Don't show a huge number in -vv when memory limit is disabled. commit 13e44a94da19d1ef14832ff12d3877a6fd2c54c0 Author: Lasse Collin Date: Sun May 27 22:30:17 2012 +0300 xz: Document the "summary" lines of --robot -lvv. This documents only the columns that are in v5.0. The new columns added in the master branch aren't necessarily stable yet. commit 2f90345e13ab8fea4de45a4f1caa73ebc63a62e7 Author: Lasse Collin Date: Sun May 27 21:53:20 2012 +0300 xz: Fix output of verbose --robot --list modes. It printed the filename in "filename (x/y)" format which it obviously shouldn't do in robot mode. commit 8d4864f53ffae5d862c691a0b334a6b69bc5366e Author: Lasse Collin Date: Thu May 10 21:15:17 2012 +0300 Update THANKS. commit 35e9c58abb0ce3993da844aaeaa3e7231cd2be8f Author: Lasse Collin Date: Thu May 10 21:14:16 2012 +0300 Docs: Cleanup line wrapping a bit. commit 532b3e4c568a228309b56f95c13435fd078dbe02 Author: Benno Schulenberg Date: Tue Mar 13 22:04:04 2012 +0100 Fix a few typos and add some missing articles in some documents. Also hyphenate several compound adjectives. Signed-off-by: Benno Schulenberg commit afb6ce8c82ffef8f2505a3759da72a733c7b0b8f Author: Lasse Collin Date: Sun Apr 29 11:51:25 2012 +0300 Windows: Update notes about static linking with MSVC. commit 7c3ba2ed5c3c878d4a14ca549b46dbff60c6d565 Author: Lasse Collin Date: Thu Apr 19 15:25:26 2012 +0300 liblzma: Remove outdated comments. commit f55db9c187651094f43881c49db2b2d9ffee6b80 Author: Lasse Collin Date: Thu Apr 19 14:17:52 2012 +0300 DOS: Link against DJGPP's libemu to support FPU emulation. This way xz should work on 386SX and 486SX. Floating point only is needed for verbose output in xz. commit 203edff4c761dbd7cac76ea66e4eed501c23e7a3 Author: Lasse Collin Date: Thu Apr 19 13:58:55 2012 +0300 Docs: Update MINIX 3 information in INSTALL. commit f0a8f95c215628967b7cf9bd9b0a9e4172f50bb4 Author: Lasse Collin Date: Wed Feb 22 14:23:13 2012 +0200 Update THANKS. commit b7ad23fa78646036c0290cd91eada939c9a31526 Author: Lasse Collin Date: Wed Feb 22 14:02:34 2012 +0200 Fix exit status of xzgrep when grepping binary files. When grepping binary files, grep may exit before it has read all the input. In this case, gzip -q returns 2 (eating SIGPIPE), but xz and bzip2 show SIGPIPE as the exit status (e.g. 141). This causes wrong exit status when grepping xz- or bzip2-compressed binary files. The fix checks for the special exit status that indicates SIGPIPE. It uses kill -l which should be supported everywhere since it is in both SUSv2 (1997) and POSIX.1-2008. Thanks to James Buren for the bug report. commit 4e19fbb04a0035030406482319e264426459eb24 Author: Lasse Collin Date: Wed Feb 22 12:08:43 2012 +0200 Update THANKS. commit c6fa03a427e3d1320794102cee3ff4f5ae00eb36 Author: Lasse Collin Date: Thu May 24 18:47:52 2012 +0300 Fix compiling with IBM XL C on AIX. commit 7b6ffc98645e1b3b302b6680be0a901d1ebf7358 Author: Lasse Collin Date: Thu May 24 18:37:08 2012 +0300 Build: Upgrade m4/acx_pthread.m4 to the latest version. It was renamed to ax_pthread.m4 in Autoconf Archive. commit bfac2be5023994fcc53de2844e7dd3af61910dc2 Author: Lasse Collin Date: Tue Jan 10 17:13:03 2012 +0200 Tests: Fix a compiler warning with _FORTIFY_SOURCE. Reported here: http://sourceforge.net/projects/lzmautils/forums/forum/708858/topic/4927385 commit df85e156716a4eecb7e2978691f03f729444d998 Author: Lasse Collin Date: Mon Dec 19 21:21:29 2011 +0200 Docs: Explain the stable releases better in README. commit d06d32f108c8278c25c24b2e8666bda7b2ec23b5 Author: Lasse Collin Date: Fri Nov 4 17:57:16 2011 +0200 xz: Fix a typo in a comment. Thanks to Bela Lubkin. commit 636fdcfbf542f1e84db2c4736004d84be4b12c84 Author: Lasse Collin Date: Thu Nov 3 17:08:02 2011 +0200 Update THANKS. commit 55fd02f83ecd6cbd6925a3e8a3d43b8d4ef2a17c Author: Lasse Collin Date: Thu Nov 3 17:07:22 2011 +0200 xz: Fix xz on EBCDIC systems. Thanks to Chris Donawa. commit 4052f36053b931bad847a36aabf1a07d0034e297 Author: Lasse Collin Date: Tue Sep 6 12:03:41 2011 +0300 Build: Fix "make check" on Windows. commit 0f25758459c74c366a73f35d47ee12b75890bb79 Author: Lasse Collin Date: Tue Aug 9 21:19:13 2011 +0300 Update THANKS. commit 70f03b51ffcb783646b20de8d97b6986c4280eec Author: Lasse Collin Date: Tue Aug 9 21:16:44 2011 +0300 Workaround unusual SIZE_MAX on SCO OpenServer. commit f138bdf76a70029e8360062a0b227936b83b24c9 Author: Lasse Collin Date: Sat Aug 6 20:37:28 2011 +0300 Run the scripts with the correct shell in test_scripts.sh. The scripts are now made executable in the build tree. This way the scripts can be run like programs in test_scripts.sh. Previously test_scripts.sh always used sh but it's not correct if @POSIX_SHELL@ is set to something else by configure. Thanks to Jonathan Nieder for the patch. commit 2c144a0365c84dbf1b6722466746d42f2563a319 Author: Lasse Collin Date: Sun Jul 31 11:01:47 2011 +0300 Fix exit status of "xzdiff foo.xz bar.xz". xzdiff was clobbering the exit status from diff in a case statement used to analyze the exit statuses from "xz" when its operands were two compressed files. Save and restore diff's exit status to fix this. The bug is inherited from zdiff in GNU gzip and was fixed there on 2009-10-09. Thanks to Jonathan Nieder for the patch and to Peter Pallinger for reporting the bug. commit edf339227a966f24aebe1845fcca9429b8f6e318 Author: Anders F Bjorklund Date: Fri Nov 5 12:56:11 2010 +0100 add build script for macosx universal commit 7fcc6334ea8923550ba6b5347ff99dc8432234b0 Author: Lasse Collin Date: Thu Jun 16 12:15:29 2011 +0300 liblzma: Remove unneeded semicolon. commit 631f4d3ae6adfda84d1a110781d9402c12e16cfc Author: Lasse Collin Date: Sat May 28 16:43:26 2011 +0300 Don't call close(-1) in tuklib_open_stdxxx() on error. Thanks to Jim Meyering. commit c89faf4c9e5970e7f7f8a25521ed9aa62d1a2d9a Author: Lasse Collin Date: Sat May 28 09:47:56 2011 +0300 Translations: Update Italian translation. Thanks to Milo Casagrande. commit 6fe2fc9b6ab5bf6848140823e9536370834f42fb Author: Lasse Collin Date: Sat May 28 08:46:04 2011 +0300 Tests: Add a test file for the bug in the previous commit. commit 6c4d4db2bc8d8b682bd927144d37daa2ab21a6d6 Author: Lasse Collin Date: Fri May 27 22:25:44 2011 +0300 xz: Fix error handling in xz -lvv. It could do an invalid free() and read past the end of the uninitialized filters array. commit 844f84fcad9670c543550edf7c7e42630c8f7715 Author: Lasse Collin Date: Fri May 27 22:09:49 2011 +0300 liblzma: Handle allocation failures correctly in lzma_index_init(). Thanks to Jim Meyering. commit 240e8b9791df597063a3b68d04ffcb3aa4f2de6a Author: Lasse Collin Date: Mon May 23 18:30:30 2011 +0300 Build: Set GZIP_ENV=-9n in top-level Makefile.am. commit e32cb264ea72a4459810f30abad70dae5a4fa17d Author: Lasse Collin Date: Sat May 21 16:59:22 2011 +0300 Bump version and soname for 5.0.3. commit 65cff45f8fd1c250491557157cce0f5f38481082 Author: Lasse Collin Date: Sat May 21 16:56:53 2011 +0300 Update NEWS for 5.0.3. commit 316c67ffdae1f811ac95e838d5290a013bff4ca7 Author: Lasse Collin Date: Sat May 21 16:28:44 2011 +0300 Add French translation. It is known that the BCJ filter --help text is only partially translated. commit 1931175eea6d09c0845d6e8e334a7333647f11c0 Author: Lasse Collin Date: Sat May 21 15:12:10 2011 +0300 xz: Translate also the string used to print the program name. French needs a space before a colon, e.g. "xz : foo error". commit 841dc1f891b48b23f84c0f0e0c86c7c4e4bdcdf5 Author: Lasse Collin Date: Tue May 17 12:26:28 2011 +0300 Update THANKS. commit 0f7e2d36240ebf1159d5fb85d8cd7422337a0d3f Author: Lasse Collin Date: Tue May 17 12:21:33 2011 +0300 Update INSTALL with a note about linker problem on OpenSolaris x86. commit 793d857e01e1725f16fc0c8af8172c91a9e38617 Author: Lasse Collin Date: Tue May 17 12:01:37 2011 +0300 Build: Fix initialization of enable_check_* variables in configure.ac. This doesn't matter much in practice since it is unlikely that anyone would have such environment variable names. Thanks to Wim Lewis. commit afcff45cee04c5c7d9c333504046ffb63d1418b5 Author: Lasse Collin Date: Tue May 17 11:54:38 2011 +0300 Add underscores to attributes (__attribute((__foo__))). commit 22159c6ba2300a006f2e46ce85ae132e2d2f7d57 Author: Lasse Collin Date: Mon Apr 18 19:35:49 2011 +0300 Update THANKS. commit 5e3499059515033d1ce44b6fb0fa49183c7ac633 Author: Martin Väth Date: Fri Apr 15 04:54:49 2011 -0400 xzgrep: fix typo in $0 parsing Reported-by: Diego Elio Pettenò Signed-off-by: Martin Väth Signed-off-by: Mike Frysinger commit 1125611b9b8d4a209b6a73d2c76e1b39c065972a Author: Lasse Collin Date: Tue Apr 12 11:59:49 2011 +0300 Remove doubled words from documentation and comments. Spot candidates by running these commands: git ls-files |xargs perl -0777 -n \ -e 'while (/\b(then?|[iao]n|i[fst]|but|f?or|at|and|[dt]o)\s+\1\b/gims)' \ -e '{$n=($` =~ tr/\n/\n/ + 1); ($v=$&)=~s/\n/\\n/g; print "$ARGV:$n:$v\n"}' Thanks to Jim Meyering for the original patch. commit 3f8fa53837bae8b44f3addf19923e26401336c3d Author: Lasse Collin Date: Mon Apr 11 21:04:13 2011 +0300 liblzma: Document lzma_easy_(enc|dec)oder_memusage() better too. commit 320d734c20d0776e3eb80f6b5984ddeb494715b5 Author: Lasse Collin Date: Mon Apr 11 20:59:07 2011 +0300 liblzma: Document lzma_raw_(enc|dec)oder_memusage() better. It didn't mention the return value that is used if an error occurs. commit 2ee4edeffc8d9734bf68230df31b20ac6a94c9b5 Author: Lasse Collin Date: Mon Apr 11 13:59:50 2011 +0300 liblzma: Don't create an empty Block in lzma_stream_buffer_encode(). Empty Block was created if the input buffer was empty. Empty Block wastes a few bytes of space, but more importantly it triggers a bug in XZ Utils 5.0.1 and older when trying to decompress such a file. 5.0.1 and older consider such files to be corrupt. I thought that no encoder creates empty Blocks when releasing 5.0.2 but I was wrong. commit 73f56fb87d54091d0c4fd22d70e6f042902e3b63 Author: Lasse Collin Date: Mon Apr 11 13:28:40 2011 +0300 liblzma: Fix API docs to mention LZMA_UNSUPPORTED_CHECK. This return value was missing from the API comments of four functions. commit 4ce1cf97a88ae1640a380dd19cbc255d729f966b Author: Lasse Collin Date: Mon Apr 11 13:21:28 2011 +0300 liblzma: Validate encoder arguments better. The biggest problem was that the integrity check type wasn't validated, and e.g. lzma_easy_buffer_encode() would create a corrupt .xz Stream if given an unsupported Check ID. Luckily applications don't usually try to use an unsupport Check ID, so this bug is unlikely to cause many real-world problems. commit 972f05d7a4268dbe42573701f83faa45d03249eb Author: Lasse Collin Date: Sat Apr 9 18:29:30 2011 +0300 Update THANKS. commit 28154eeaf6e3442cd1e174f4e81266d60c4dac60 Author: Lasse Collin Date: Sat Apr 9 18:28:58 2011 +0300 liblzma: Add missing #ifdefs to filter_common.c. Passing --disable-decoders to configure broke a few encoders due to missing #ifdefs in filter_common.c. Thanks to Jason Gorski for the patch. commit aa95516d3d509c6b7895ee519004afcf500a0759 Author: Lasse Collin Date: Sat Apr 2 14:49:56 2011 +0300 liblzma: Fix a memory leak in stream_encoder.c. It leaks old filter options structures (hundred bytes or so) every time the lzma_stream is reinitialized. With the xz tool, this happens when compressing multiple files. commit 58f52c72f49562a08042da9a2f4bbdf4dd162d0c Author: Lasse Collin Date: Fri Apr 1 08:47:46 2011 +0300 Bumped version and liblzma soname to 5.0.2. commit 162779682e01d15f0ce386ef7f40d1be05ad0053 Author: Lasse Collin Date: Fri Apr 1 08:47:20 2011 +0300 Updated NEWS for 5.0.2. commit 45553f9b4b0175c292023010dc41520347004852 Author: Lasse Collin Date: Thu Mar 31 15:06:58 2011 +0300 Update INSTALL with another note about IRIX. commit af9d48d5515eadef689b1ce9ffb91e4dbcbc7f35 Author: Lasse Collin Date: Thu Mar 31 12:22:55 2011 +0300 Tests: Add a new file to test empty LZMA2 streams. commit d099ef9f517b59ab8e3b6f6aa0543c3643983470 Author: Lasse Collin Date: Thu Mar 31 11:54:48 2011 +0300 liblzma: Fix decoding of LZMA2 streams having no uncompressed data. The decoder considered empty LZMA2 streams to be corrupt. This shouldn't matter much with .xz files, because no encoder creates empty LZMA2 streams in .xz. This bug is more likely to cause problems in applications that use raw LZMA2 streams. commit df87249b26e79a75fd91041e85512944fc247b57 Author: Lasse Collin Date: Thu Mar 24 01:42:49 2011 +0200 Scripts: Better fix for xzgrep. Now it uses "grep -q". Thanks to Gregory Margo. commit 68c453e1c7b09dc9c7d2ef9d994c46f5b367f5d3 Author: Lasse Collin Date: Thu Mar 24 01:22:18 2011 +0200 Updated THANKS. commit b441d39855516ae618faffd5156261b8b413394f Author: Lasse Collin Date: Thu Mar 24 01:21:32 2011 +0200 Scripts: Fix xzgrep -l. It didn't work at all. It tried to use the -q option for grep, but it appended it after "--". This works around it by redirecting to /dev/null. The downside is that this can be slower with big files compared to proper use of "grep -q". Thanks to Gregory Margo. commit 82d5164839517f55daeadd9ee88c76425db30224 Author: Lasse Collin Date: Fri Feb 4 22:49:31 2011 +0200 xz: Clean up suffix.c. struct suffix_pair isn't needed in compresed_name() so get rid of it there. commit 6decc8b41882c2250f0450eb87b83c9fbf495e95 Author: Lasse Collin Date: Fri Feb 4 11:29:47 2011 +0200 xz: Check if the file already has custom suffix when compressing. Now "xz -S .test foo.test" refuses to compress the file because it already has the suffix .test. The man page had it documented this way already. commit ecda90061df8d39399e707e5c76c2ec0a0f400e5 Author: Lasse Collin Date: Wed Feb 2 23:01:51 2011 +0200 Updated THANKS. commit 0fda1ae5b1aa0a5c629a09e5228db8ba1cd0dd5f Author: Lasse Collin Date: Wed Feb 2 23:00:33 2011 +0200 Translations: Add Polish translation. Thanks to Jakub Bogusz. commit 00be32978fedc5038748438bf685ac1713d1db83 Author: Lasse Collin Date: Wed Feb 2 22:24:00 2011 +0200 Updated THANKS. commit 7232fcf96bf4bd5f9cd4fc6c93ca2912c665e004 Author: Lasse Collin Date: Fri Jan 28 20:26:38 2011 +0200 Bump package version and liblzma soname to 5.0.1. commit 5fbce0b8d96dc96775aa0215e3581addc830e23d Author: Lasse Collin Date: Fri Jan 28 20:16:57 2011 +0200 Update NEWS for 5.0.1. commit 03ebd1bbb314f9f204940219a835c883bf442475 Author: Lasse Collin Date: Wed Jan 26 12:19:08 2011 +0200 xz: Fix --force on setuid/setgid/sticky and multi-hardlink files. xz didn't compress setuid/setgid/sticky files and files with multiple hard links even with --force. This bug was introduced in 23ac2c44c3ac76994825adb7f9a8f719f78b5ee4. Thanks to Charles Wilson. commit 7bd0a5e7ccc354f7c2e95c8bc27569c820f6a136 Author: Lasse Collin Date: Tue Jan 18 21:25:24 2011 +0200 Updated THANKS. commit f71c4e16e913f660977526f0ef8d2acdf458d7c9 Author: Lasse Collin Date: Tue Jan 18 21:23:50 2011 +0200 Add alloc_size and malloc attributes to a few functions. Thanks to Cristian Rodríguez for the original patch. commit 316cbe24465143edde8f6ffb7532834b7b2ea93f Author: Lasse Collin Date: Mon Dec 13 16:36:33 2010 +0200 Scripts: Fix gzip and bzip2 support in xzdiff. commit 9311774c493c19deab51ded919dcd2e9c4aa2829 Author: Lasse Collin Date: Sun Dec 12 21:23:55 2010 +0200 Build: Enable ASM on DJGPP by default. commit 4a42aaee282fc73b482581684d65110506d5efdd Author: Lasse Collin Date: Sun Dec 12 16:09:42 2010 +0200 Updated THANKS. commit ce56f63c41ee210e6308090eb6d49221fdf67d6c Author: Lasse Collin Date: Sun Dec 12 16:07:11 2010 +0200 Add missing PRIx32 and PRIx64 compatibility definitions. This fixes portability to systems that lack C99 inttypes.h. Thanks to Juan Manuel Guerrero. commit e6baedddcf54e7da049ebc49183565b99facd4c7 Author: Lasse Collin Date: Sun Dec 12 14:50:04 2010 +0200 DOS-like: Treat \ and : as directory separators in addition to /. Juan Manuel Guerrero had fixed this in his XZ Utils port to DOS/DJGPP. The bug affects also Windows and OS/2. commit b7afd3e22a8fac115b75c738d40d3eb1de7e286f Author: Lasse Collin Date: Tue Dec 7 18:52:04 2010 +0200 Translations: Fix Czech translation of "sparse file". Thanks to Petr Hubený and Marek Černocký. commit 3e564704bc6f463cb2db11e3f3f0dbd71d85992e Author: Lasse Collin Date: Mon Nov 15 14:28:26 2010 +0200 liblzma: Document the return value of lzma_lzma_preset(). commit 2964d8d691ed92abdcf214888d79ad6d79774735 Author: Jonathan Nieder Date: Fri Nov 12 15:22:13 2010 -0600 Simplify paths in generated API docs Currently the file list generated by Doxygen has src/ at the beginning of each path. Paths like common/sysdefs.h and liblzma/api/lzma.h are easier to read without such a prefix. Builds from a separate build directory with mkdir build cd build ../configure doxygen Doxyfile include an even longer prefix /home/someone/src/xz/src; this patch has the nice side-effect of eliminating that prefix, too. Fixes: http://bugs.debian.org/572273 commit 37c25658efd25b034266daf87cd381d20d1df776 Author: Lasse Collin Date: Tue Oct 26 15:48:48 2010 +0300 Build: Copy the example programs to $docdir/examples. The example programs by Daniel Mealha Cabrita were included in the git repository, but I had forgot to add them to Makefile.am. Thus, they didn't get included in the source package at all by "make dist". commit e45929260cd902036efd40c5610a8d0a50d5712b Author: Lasse Collin Date: Sat Oct 23 17:25:52 2010 +0300 Build: Fix mydist rule when .git doesn't exist. commit 6e1326fcdf6b6209949be57cfe3ad4b781b65168 Author: Lasse Collin Date: Sat Oct 23 14:15:35 2010 +0300 Add NEWS for 5.0.0. commit b667a3ef6338a2c1db7b7706b1f6c99ea392221c Author: Lasse Collin Date: Sat Oct 23 14:02:53 2010 +0300 Bump version to 5.0.0 and liblzma version-info to 5:0:0. commit 8c947e9291691629714dafb4536c718b6cc24fbd Author: Lasse Collin Date: Sat Oct 23 12:30:54 2010 +0300 liblzma: Make lzma_code() check the reserved members in lzma_stream. If any of the reserved members in lzma_stream are non-zero or non-NULL, LZMA_OPTIONS_ERROR is returned. It is possible that a new feature in the future is indicated by just setting a reserved member to some other value, so the old liblzma version need to catch it as an unsupported feature. commit e61d85e082743ebd2dd0ff28fc0a82482ede0538 Author: Lasse Collin Date: Sat Oct 23 12:26:33 2010 +0300 Windows: Use MinGW's stdio functions. The non-standard ones from msvcrt.dll appear to work most of the time with XZ Utils, but there are some corner cases where things may go very wrong. So it's good to use the better replacements provided by MinGW(-w64) runtime. commit 23e23f1dc029146714c9a98313ab3ea93d71a2fc Author: Lasse Collin Date: Sat Oct 23 12:21:32 2010 +0300 liblzma: Use 512 as INDEX_GROUP_SIZE. This lets compiler use shifting instead of 64-bit division. commit 613939fc82603b75b59eee840871a05bc8dd08e0 Author: Lasse Collin Date: Sat Oct 23 12:20:11 2010 +0300 liblzma: A few ABI tweaks to reserve space in structures. commit 68b83f252df3d27480a9f6f03445d16f6506fef1 Author: Lasse Collin Date: Thu Oct 21 23:16:11 2010 +0300 xz: Make sure that message_strm() can never return NULL. commit d09c5753e33ff96ee57edb6d1e98e34041203695 Author: Lasse Collin Date: Thu Oct 21 23:06:31 2010 +0300 liblzma: Update the comments in the API headers. Adding support for LZMA_FINISH for Index encoding and decoding needed tiny additions to the relevant .c files too. commit 33c1c0e102eb529588503b8beea0903a45488fad Author: Lasse Collin Date: Tue Oct 19 12:08:30 2010 +0300 Update INSTALL.generic. commit 0076e03641f201c4b77dddd5a6db5880be19a78c Author: Lasse Collin Date: Tue Oct 19 11:44:37 2010 +0300 Clean up a few FIXMEs and TODOs. lzma_chunk_size() was commented out because it is currently useless. commit ce34ec4f54ff8b753da236f371ad8dd23c8135c9 Author: Lasse Collin Date: Tue Oct 19 10:21:08 2010 +0300 Update docs. commit f0fa880d247e73264d2c04fe31fb3412318a0026 Author: Lasse Collin Date: Tue Oct 12 15:13:30 2010 +0300 xz: Avoid raise() also on OpenVMS. This is similar to DOS/DJGPP that killing the program with a signal will print a backtrace or a similar message. commit ac462b1c47c451f5c62e428306314c4bdad8ae7f Author: Lasse Collin Date: Mon Oct 11 21:26:19 2010 +0300 xz: Avoid SA_RESTART for portability reasons. SA_RESTART is not as portable as I had hoped. It's missing at least from OpenVMS, QNX, and DJGPP). Luckily we can do fine without SA_RESTART. commit d52b411716a614c202e89ba732492efb9916cd3f Author: Lasse Collin Date: Sun Oct 10 17:58:58 2010 +0300 xz: Use "%"PRIu32 instead of "%d" in a format string. commit ae74d1bdeb075c3beefe76e1136c5741804e7e91 Author: Lasse Collin Date: Sun Oct 10 17:43:26 2010 +0300 test_files.sh: Fix the first line. For some reason this prevented running the test only on OS/2 and even on that it broke only recently. Thanks to Elbert Pol. commit d492b80ddd6f9a13419de6d102df7374d8f448e8 Author: Lasse Collin Date: Sun Oct 10 16:49:01 2010 +0300 lzmainfo: Use "%"PRIu32 instead of "%u" for uint32_t. commit 825e859a9054bd91202e5723c41a17e72f63040a Author: Lasse Collin Date: Sun Oct 10 16:47:01 2010 +0300 lzmainfo: Use fileno(stdin) instead of STDIN_FILENO. commit acbc4cdecbeec2a4dfaac04f185ece49b2ff17c8 Author: Lasse Collin Date: Sat Oct 9 23:20:51 2010 +0300 lzmainfo: Use setmode() on DOS-like systems. commit ef364d3abc5647111c5424ea0d83a567e184a23b Author: Lasse Collin Date: Sat Oct 9 21:51:03 2010 +0300 OS/2 and DOS: Be less verbose on signals. Calling raise() to kill xz when user has pressed C-c is a bit verbose on OS/2 and DOS/DJGPP. Instead of calling raise(), set only the exit status to 1. commit 5629c4be07b6c67e79842b2569da1cedc9c0d69a Author: Lasse Collin Date: Sat Oct 9 19:28:49 2010 +0300 DOS: Update the Makefile, config.h and README. This is now simpler and builds only xz.exe. commit f25a77e6b9bc48a243ddfbbd755b7960eec7e0ac Author: Lasse Collin Date: Sat Oct 9 18:57:55 2010 +0300 Windows: Put some license info into README-Windows.txt. commit e75100f549f85d231df25c07aa94d63e78e2d668 Author: Lasse Collin Date: Sat Oct 9 18:57:04 2010 +0300 Windows: Fix a diagnostics bug in build.bash. commit efeb998a2b1025df1c1d202cc7d21d866cd1c336 Author: Lasse Collin Date: Sat Oct 9 13:02:15 2010 +0300 lzmainfo: Add Windows resource file. commit 389d418445f1623593dfdbba55d52fbb6d1205f5 Author: Lasse Collin Date: Sat Oct 9 12:57:25 2010 +0300 Add missing public domain notice to lzmadec_w32res.rc. commit 6389c773a4912dd9f111256d74ba1605230a7957 Author: Lasse Collin Date: Sat Oct 9 12:52:12 2010 +0300 Windows: Update common_w32res.rc. commit 71275457ca24c9b01721f5cfc3638cf094daf454 Author: Lasse Collin Date: Sat Oct 9 12:27:08 2010 +0300 Windows: Make build.bash prefer MinGW-w32 over MinGW. This is simply for licensing reasons. The 64-bit version will be built with MinGW-w64 anyway (at least for now), so using it also for 32-bit build allows using the same copyright notice about the MinGW-w64/w32 runtime. Note that using MinGW would require a copyright notice too, because its runtime is not in the public domain either even though MinGW's home page claims that it is public domain. See . commit 3ac35719d8433af937af6491383d4a50e343099b Author: Lasse Collin Date: Sat Oct 9 11:33:21 2010 +0300 Windows: Copy COPYING-Windows.txt (if it exists) to the package. Also, put README-Windows.txt to the doc directory like the other documentation files. commit 7b5db576fd7a4a67813b8437a9ccd4dbc94bbaae Author: Lasse Collin Date: Fri Oct 8 21:42:37 2010 +0300 Windows: Fix build.bash again. 630a8beda34af0ac153c8051b1bf01230558e422 wasn't good. commit d3cd7abe85ec7c2f46cf198b15c00d5d119df3dd Author: Lasse Collin Date: Fri Oct 8 16:53:20 2010 +0300 Use LZMA_VERSION_STRING instead of PACKAGE_VERSION. Those are the same thing, and the former makes it a bit easier to build the code with other build systems, because one doesn't need to update the version number into custom config.h. This change affects only lzmainfo. Other tools were already using LZMA_VERSION_STRING. commit 084c60d318f2dbaef4078d9b100b4a373d0c3a7f Author: Lasse Collin Date: Fri Oct 8 15:59:25 2010 +0300 configure.ac: Remove two unused defines. commit 11f51b6714357cb67ec7e56ed9575c199b5581fe Author: Lasse Collin Date: Fri Oct 8 15:32:29 2010 +0300 Make tests accommodate missing xz or xzdec. commit b1c7368f95e93ccdefdd0748e04398c26766f47f Author: Lasse Collin Date: Fri Oct 8 15:25:45 2010 +0300 Build: Add options to disable individual command line tools. commit 630a8beda34af0ac153c8051b1bf01230558e422 Author: Lasse Collin Date: Thu Oct 7 00:44:53 2010 +0300 Windows: Make build.bash work without --enable-dynamic=no. commit f9907503f882a745dce9d84c2968f6c175ba966a Author: Lasse Collin Date: Tue Oct 5 14:13:16 2010 +0300 Build: Remove the static/dynamic tricks. Most distros want xz linked against shared liblzma, so it doesn't help much to require --enable-dynamic for that. Those who want to avoid PIC on x86-32 to get better performance, can still do it e.g. by using --disable-shared to compile xz and then another pass to compile shared liblzma. Part of these static/dynamic tricks were needed for Windows in the past. Nowadays we rely on GCC and binutils to do the right thing with auto-import. If the Autotooled build system needs to support some other toolchain on Windows in the future, this may need some rethinking. commit fda4724d8114fccfa31c1839c15479f350c2fb4c Author: Lasse Collin Date: Tue Oct 5 12:18:58 2010 +0300 configure.ac: Silence a warning from Autoconf 2.68. commit 80b5675fa62c87426fe86f8fcd20feeabc4361b9 Author: Lasse Collin Date: Mon Oct 4 19:43:01 2010 +0300 A few more languages files to the xz man page. Thanks to Jonathan Nieder. commit f9722dbeca4dc4c43cfd15d122dafaac50b0a0bb Author: Lasse Collin Date: Sat Oct 2 12:07:33 2010 +0300 Update the FAQ. commit 61ae593661e8dc402394e84d567ca2044a51572b Author: Lasse Collin Date: Sat Oct 2 11:38:20 2010 +0300 liblzma: Small fixes to comments in the API headers. commit 9166682dc601fd42c1b9510572e3f917d18de504 Author: Lasse Collin Date: Tue Sep 28 11:40:12 2010 +0300 Create the PDF versions of the man pages better. commit 17d3c61edd35de8fa884944fc70d1db86daa5dd8 Author: Lasse Collin Date: Tue Sep 28 10:59:53 2010 +0300 Move version.sh to build-aux. commit 84af9d8770451339a692e9b70f96cf56156a6069 Author: Lasse Collin Date: Tue Sep 28 10:53:02 2010 +0300 Update .gitignore. commit 31575a449ac64c523da3bab8d0c0b522cdc7c780 Author: Lasse Collin Date: Tue Sep 28 01:17:14 2010 +0300 Fix accomodate -> accommodate on the xz man page. commit cec0ddc8ec4ce81685a51998b978e22167e461f9 Author: Lasse Collin Date: Mon Sep 27 23:29:34 2010 +0300 Major man page updates. Lots of content was updated on the xz man page. Technical improvements: - Start a new sentence on a new line. - Use fairly short lines. - Use constant-width font for examples (where supported). - Some minor cleanups. Thanks to Jonathan Nieder for some language fixes. commit 075257ab0416a0603be930082e31a5703e4ba345 Author: Lasse Collin Date: Sun Sep 26 18:10:31 2010 +0300 Fix the preset -3e. depth=0 was missing. commit 2577da9ebdba13fbe99ae5ee8bde35f7ed60f6d1 Author: Lasse Collin Date: Thu Sep 23 14:03:10 2010 +0300 Add translations.bash and translation notes to README. translations.bash prints some messages from xz, which hopefully makes it a bit easier to test translations. commit a3c5997c57e5b1a20aae6d1071b584b4f17d0b23 Author: Lasse Collin Date: Fri Sep 17 22:14:30 2010 +0300 xz: Update the Czech translation. Thanks to Marek Černocký. commit a1766af582dc23fddd9da1eeb4b9d61e3eb4c2e6 Author: Lasse Collin Date: Thu Sep 16 23:40:41 2010 +0300 xz: Add Italian translation. Thanks to Milo Casagrande and Lorenzo De Liso. commit 21088018554e2b0e02914205377ceb6e34a090bd Author: Lasse Collin Date: Wed Sep 15 00:34:13 2010 +0300 xz: Edit a translators comment. commit be16e28ece1b492b8f93382b7fa1cc4da23c6ff6 Author: Lasse Collin Date: Tue Sep 14 22:47:14 2010 +0300 xz: Add German translation. Thanks to Andre Noll. commit e23ea74f3240e6b69683f9e69d1716e0f9e9092b Author: Lasse Collin Date: Fri Sep 10 14:30:25 2010 +0300 Updated README. commit 8dad2fd69336985adb9f774fa96dc9c0efcb5a71 Author: Lasse Collin Date: Fri Sep 10 14:30:07 2010 +0300 Updated INSTALL. commit 0b5f07fe3728c27cce416ddc40f7e4803ae96ac2 Author: Lasse Collin Date: Fri Sep 10 14:26:20 2010 +0300 Updated the git repository address in ChangeLog. commit a8760203f93a69bc39fd14520a6e9e7b7d70be06 Author: Lasse Collin Date: Fri Sep 10 14:09:33 2010 +0300 xz: Add a comment to translators about "literal context bits". commit bb0b1004f83cdc4d309e1471c2ecaf9f95ce60c5 Author: Lasse Collin Date: Fri Sep 10 10:30:33 2010 +0300 xz: Multiple fixes. The code assumed that printing numbers with thousand separators and decimal points would always produce only US-ASCII characters. This was used for buffer sizes (with snprintf(), no overflows) and aligning columns of the progress indicator and --list. That assumption was wrong (e.g. LC_ALL=fi_FI.UTF-8 with glibc), so multibyte character support was added in this commit. The old way is used if the operating system doesn't have enough multibyte support (e.g. lacks wcwidth()). The sizes of buffers were increased to accomodate multibyte characters. I don't know how big they should be exactly, but they aren't used for anything critical, so it's not too bad. If they still aren't big enough, I hopefully get a bug report. snprintf() takes care of avoiding buffer overflows. Some static buffers were replaced with buffers allocated on stack. double_to_str() was removed. uint64_to_str() and uint64_to_nicestr() now share the static buffer and test for thousand separator support. Integrity check names "None" and "Unknown-N" (2 <= N <= 15) were marked to be translated. I had forgot these, plus they wouldn't have worked correctly anyway before this commit, because printing tables with multibyte strings didn't work. Thanks to Marek Černocký for reporting the bug about misaligned table columns in --list output. commit 639f8e2af33cf8a184d59ba56b6df7c098679d61 Author: Lasse Collin Date: Wed Sep 8 08:49:22 2010 +0300 Update the Czech translation. Thanks to Marek Černocký. commit 41bc9956ebfd7c86777d33676acf34c45e7ca7c7 Author: Lasse Collin Date: Tue Sep 7 12:31:40 2010 +0300 xz: Add a note to translators. commit 77a7746616e555fc08028e883a56d06bf0088b81 Author: Lasse Collin Date: Tue Sep 7 10:42:13 2010 +0300 Fix use of N_() and ngettext(). I had somehow thought that N_() is usually used as shorthand for ngettext(). This also fixes a missing \n from a call to ngettext(). commit e6ad39335842343e622ab51207d1d3cb9caad801 Author: Lasse Collin Date: Mon Sep 6 19:43:12 2010 +0300 Add missing files to POTFILES.in. commit 58f55131820d2e08a1a6beb9ec0ee2378044eb30 Author: Lasse Collin Date: Mon Sep 6 10:16:24 2010 +0300 xz: Improve a comment. commit bcb1b898341f7073f51660d7052d7ed6c5461a66 Author: Lasse Collin Date: Sun Sep 5 21:34:29 2010 +0300 xz: Update the comment about NetBSD in file_io.c. Thanks to Joerg Sonnenberger. commit da014d55972f5addbf6b4360d3d8ed2ef4282170 Author: Lasse Collin Date: Sun Sep 5 21:11:33 2010 +0300 xz: Use an array instead of pointer for stdin_filename. Thanks Joerg Sonnenberger. commit 8c7d3d1a0781c296c6b6e2465becaffd2132f7ee Author: Lasse Collin Date: Sun Sep 5 12:16:17 2010 +0300 xz: Hopefully ease translating the messages in list.c. commit ef840950ad99cf2955c754875af0e01acf125079 Author: Lasse Collin Date: Sat Sep 4 23:14:44 2010 +0300 xz: Fix grammar. commit c46afd6edc04ea140db6c59e8486f5707c810c13 Author: Lasse Collin Date: Sat Sep 4 23:12:20 2010 +0300 xz: Use lzma_lzma_preset() to initialize the options structure. commit 8fd3ac046d0b1416a2094fecc456d9e0f4d5d065 Author: Lasse Collin Date: Sat Sep 4 22:16:28 2010 +0300 Don't set lc=4 with --extreme. This should reduce the cases where --extreme makes compression worse. On the other hand, some other files may now benefit slightly less from --extreme. commit 474bac0c33e94aeaca8ada17ab19972b1424bc2b Author: Lasse Collin Date: Sat Sep 4 22:10:32 2010 +0300 xz: Minor improvements to --help and --long-help. commit 373ee26f955617295c5c537b04a153a1969140d2 Author: Jonathan Nieder Date: Fri Sep 3 16:49:15 2010 -0500 Adjust memory limits in test_compress.sh Testing compression at level -4 now requires 48 MiB of free store at compression time and 5 MiB at decompression time. Signed-off-by: Jonathan Nieder commit 2fce9312f36727ea82f3430cc5d3a7d243c5f087 Author: Lasse Collin Date: Fri Sep 3 15:54:40 2010 +0300 xz: Make -vv show also decompressor memory usage. commit b4b1cbcb53624ab832f8b3189c74450dc7ea29b6 Author: Lasse Collin Date: Fri Sep 3 15:13:12 2010 +0300 Tweak the compression presets -0 .. -5. "Extreme" mode might need some further tweaking still. Docs were not updated yet. commit 77fe5954cd3d10fb1837372684cbc133b56b6a87 Author: Lasse Collin Date: Fri Sep 3 12:28:41 2010 +0300 liblzma: Adjust default depth calculation for HC3 and HC4. It was 8 + nice_len / 4, now it is 4 + nice_len / 4. This allows faster settings at lower nice_len values, even though it seems that I won't use automatic depth calcuation with HC3 and HC4 in the presets. commit fce69059cf901ce8075a78c7607d591f144a3b5a Author: Lasse Collin Date: Fri Sep 3 11:11:25 2010 +0300 xz: Make --help two lines shorter. At least for now, the --help option doesn't list any options that take arguments, so "Mandatory arguments to..." can be omitted. commit a848e47ced6e5e2a564b5c454b2f5a19c2f40298 Author: Lasse Collin Date: Thu Sep 2 19:22:35 2010 +0300 xz: Make setting a preset override a custom filter chain. This is more logical behavior than ignoring preset level options once a custom filter chain has been specified. commit b3ff7ba044eaeab3e424d7b51fe914daf681b1a3 Author: Lasse Collin Date: Thu Sep 2 19:09:57 2010 +0300 xz: Always warn if adjusting dictionary size due to memlimit. commit d5653ba8a1ea9c00de4fddc617aba3c51e18139d Author: Lasse Collin Date: Tue Aug 10 11:04:30 2010 +0300 Fix test_compress.sh. It broke when --memory option was removed from xzdec. Thanks to Jonathan Nieder. commit 792331bdee706aa852a78b171040ebf814c6f3ae Author: Lasse Collin Date: Sat Aug 7 20:45:18 2010 +0300 Disable the memory usage limiter by default. For several people, the limiter causes bigger problems that it solves, so it is better to have it disabled by default. Those who want to have a limiter by default need to enable it via the environment variable XZ_DEFAULTS. Support for environment variable XZ_DEFAULTS was added. It is parsed before XZ_OPT and technically identical with it. The intended uses differ quite a bit though; see the man page. The memory usage limit can now be set separately for compression and decompression using --memlimit-compress and --memlimit-decompress. To set both at once, -M or --memlimit can be used. --memory was retained as a legacy alias for --memlimit for backwards compatibility. The semantics of --info-memory were changed in backwards incompatible way. Compatibility wasn't meaningful due to changes in the memory usage limiter functionality. The memory usage limiter info is no longer shown at the bottom of xz --long -help. The memory usage limiter support for removed completely from xzdec. xz's man page was updated to match the above changes. Various unrelated fixes were also made to the man page. commit 4a45dd4c39f75d25c7a37b6400cb24d4010ca801 Author: Lasse Collin Date: Fri Aug 6 20:22:16 2010 +0300 Add missing const to a global constant in xz. commit 01aa4869cb220b7fdad6d1acbabb2233045daa8f Author: Lasse Collin Date: Wed Jul 28 11:44:55 2010 +0300 Language fixes for man pages. Thanks to A. Costa and Jonathan Nieder. commit ce1f0deafe8504e1492bf1b1efb3e3ec950b1a2b Author: Lasse Collin Date: Tue Jul 27 20:47:12 2010 +0300 Windows: Add a note about building a Git repository snapshot commit 507a4a4dea1e5462f12f7ed4b076c34e02054a38 Author: Lasse Collin Date: Tue Jul 27 20:45:03 2010 +0300 Windows: build.sh is a bash script so name it correctly. commit b1cbfd40f049a646a639eb78a3e41e9e3ef73339 Author: Lasse Collin Date: Tue Jul 27 20:27:32 2010 +0300 Windows: Don't strip liblzma.a too much. commit a540198ffb25fad36380c5e92ac20c2d28eec46a Author: Lasse Collin Date: Tue Jul 13 20:07:26 2010 +0300 Updated THANKS. commit bab0f01ed931f606b4675aa9f9331a17cec09bad Author: Lasse Collin Date: Tue Jul 13 19:55:50 2010 +0300 Add two simple example programs. Hopefully these help a bit when learning the basics of liblzma API. I plan to write detailed examples about both basic and advanced features with lots of comments, but these two examples are good have right now. The examples were written by Daniel Mealha Cabrita. Thanks. commit c15c42abb3c8c6e77c778ef06c97a4a10b8b5d00 Author: Lasse Collin Date: Tue Jun 15 14:06:29 2010 +0300 Add --no-adjust. commit 2130926dd1c839280358172dfadd8d3054bde2b4 Author: Lasse Collin Date: Fri Jun 11 21:51:32 2010 +0300 Updated THANKS. commit bc612d0e0c9e4504c59d49168e87a7ae3e458443 Author: Lasse Collin Date: Fri Jun 11 21:48:32 2010 +0300 Clarify the description of the default memlimit in the man page. Thanks to Denis Excoffier. commit e1b6935d60a00405e6b5b455a3426d2248cc926c Author: Lasse Collin Date: Fri Jun 11 21:43:28 2010 +0300 Fix string to uint64_t conversion. Thanks to Denis Excoffier for the bug report. commit 3e49c8acb0f5312948eddb2342dbb5802d4571d0 Author: Lasse Collin Date: Fri Jun 11 10:40:28 2010 +0300 Put the git commit to the filename in mydist rule. commit d8b41eedce486d400f701b757b7b5e4e32276618 Author: Lasse Collin Date: Wed Jun 2 23:13:55 2010 +0300 Fix compiling with -Werror. commit b5fbab6123a39c9a55cd5d7af410e9aae067d5f8 Author: Lasse Collin Date: Wed Jun 2 23:09:22 2010 +0300 Silence a bogus Valgrind warning. When using -O2 with GCC, it liked to swap two comparisons in one "if" statement. It's otherwise fine except that the latter part, which is seemingly never executed, got executed (nothing wrong with that) and then triggered warning in Valgrind about conditional jump depending on uninitialized variable. A few people find this annoying so do things a bit differently to avoid the warning. commit 29a7b250e685852f2f97615493ec49acaf528623 Author: Lasse Collin Date: Wed Jun 2 21:32:12 2010 +0300 Fix a Windows-specific FIXME in signal handling code. commit e89d987056cee7d4e279be3ef3a6cc690bfc0e6d Author: Lasse Collin Date: Wed Jun 2 17:46:58 2010 +0300 Adjust SA_RESTART workaround. I want to get a bug report if something else than DJGPP lacks SA_RESTART. commit e243145c84ab5c3be8259fd486ead0de5235b3f0 Author: Lasse Collin Date: Tue Jun 1 16:02:30 2010 +0300 xz man page updates. - Concatenating .xz files and padding - List mode - Robot mode - A few examples (but many more are needed) commit ce6dc3c0a891f23a862f80ec08d3b6f0beb2a562 Author: Lasse Collin Date: Tue Jun 1 15:51:44 2010 +0300 Major update to xz --list. commit 905e54804a899e4ad526d38fdba7e803ab9b71bd Author: Lasse Collin Date: Tue Jun 1 14:13:03 2010 +0300 Rename message_filters_get() to message_filters_to_str(). commit 4b346ae8af20045027ae5efb068c6d69da3324d2 Author: Lasse Collin Date: Tue Jun 1 14:09:12 2010 +0300 Fix a comment. commit 07dc34f6da45c9ab757dad7fd5eef522ad27d296 Author: Lasse Collin Date: Thu May 27 16:17:42 2010 +0300 Fix lzma_block_compressed_size(). commit 44d70cb154225e47eebf15a3cfbdf3794cbb4593 Author: Lasse Collin Date: Thu May 27 14:32:51 2010 +0300 Take Cygwin into account in some #if lines. This change is no-op, but good to have just in case for the future. commit a334348dc02803241cf4e0a539eecdc0e7ad2cc7 Author: Lasse Collin Date: Thu May 27 13:42:44 2010 +0300 Remove references to the Subblock filter in xz and tests. Thanks to Jonathan Nieder. commit 70e5e2f6a7084e6af909deee88ceac2f6efa7893 Author: Lasse Collin Date: Thu May 27 13:35:36 2010 +0300 Remove unused chunk_size.c. Thanks to Jonathan Nieder for the reminder. commit 01a414eaf4be6352c06b48001b041b47e8202faa Author: Jonathan Nieder Date: Thu May 27 02:31:33 2010 -0500 Use my_min() instead of MIN() in src/xz/list.c This should have been done in 920a69a8d8e4203c5edddd829d932130eac188ea. commit 920a69a8d8e4203c5edddd829d932130eac188ea Author: Lasse Collin Date: Wed May 26 10:36:46 2010 +0300 Rename MIN() and MAX() to my_min() and my_max(). This should avoid some minor portability issues. commit 019ae27c24d0c694545a6a46f8b9fb552198b015 Author: Lasse Collin Date: Wed May 26 10:30:20 2010 +0300 Fix compilation of debug/known_sizes.c. commit 98a4856a6ea84f79c790057a6eb89a25bc45b074 Author: Lasse Collin Date: Wed May 26 10:28:54 2010 +0300 Remove references to Subblock filter in debug/sync_flush.c. commit 703d2c33c095c41ae0693ee8c27c45e3847e4535 Author: Lasse Collin Date: Wed May 26 10:16:57 2010 +0300 Better #error message. commit d8a55c48b39703dd83f11089ad01e1ff2ac102e0 Author: Lasse Collin Date: Wed May 26 09:55:47 2010 +0300 Remove the Subblock filter code for now. The spec isn't finished and the code didn't compile anymore. It won't be included in XZ Utils 5.0.0. It's easy to get it back once the spec is done. commit b6377fc990f9b8651149cae0fecb8b9c5904e26d Author: Lasse Collin Date: Sun May 16 18:42:22 2010 +0300 Split message_filters(). message_filters_to_str() converts the filter chain to a string. message_filters_show() replaces the original message_filters(). uint32_to_optstr() was also added to show the dictionary size in nicer format when possible. commit d9986db782d6cf0f314342127280519339378fa0 Author: Lasse Collin Date: Fri May 14 23:17:20 2010 +0300 Omit lzma_restrict from the API headers. It isn't really useful so omitting it makes things shorter and slightly more readable. commit 0d3489efca0a723dca0394809fa3e6170843af4b Author: Lasse Collin Date: Mon May 10 19:57:24 2010 +0300 Updated INSTALL. commit 3fb3d594a2b53886adee161b6261e92277f05f7c Author: Lasse Collin Date: Mon May 10 19:54:52 2010 +0300 Updated THANKS. commit 6548e304657e77d3a972053db3c41c5daf591113 Author: Lasse Collin Date: Mon May 10 19:54:15 2010 +0300 Updates to tuklib_physmem and tuklib_cpucores. Don't use #error to generate compile error, because some compilers actually don't take it as an error. This fixes tuklib_physmem on IRIX. Fix incorrect error check for sysconf() return values. Add AIX, HP-UX, and Tru64 specific code to detect the amount RAM. Add HP-UX specific code to detect the number of CPU cores. Thanks a lot to Peter O'Gorman for initial patches, testing, and debugging these fixes. commit a290cfee3e23f046889c022aa96b4eca2016fdda Author: Lasse Collin Date: Mon Apr 12 21:55:56 2010 +0300 Show both elapsed time and estimated remaining time in xz -v. The extra space for showing both has been taken from the sizes field. If the sizes grow big, bigger units than MiB will be used. It makes it slightly difficult to see that progress is still happening with huge files, but it should be OK in practice. Thanks to Trent W. Buck for and Jonathan Nieder for suggestions how to fix it. commit a1f7a986b8d708f9290da9799ca1b8d7082fad3e Author: Lasse Collin Date: Wed Mar 31 16:47:25 2010 +0300 Add a simple tip to faq.txt about tar and xz. Thanks to Gilles Espinasse. commit c737eec91d200d730aa82662affd6b06ebb0bff0 Author: Lasse Collin Date: Mon Mar 22 21:03:03 2010 +0200 Updated THANKS. commit f4b2b52624b802c786e4e2a8eb6895794dd93b24 Author: Lasse Collin Date: Sun Mar 7 19:52:25 2010 +0200 Fix xzgrep to not break if filenames have spaces or quotes. Thanks to someone who reported the bug on IRC. commit cf38da00a140bd3bd65b192390ae5553380fd774 Author: Lasse Collin Date: Sun Mar 7 13:59:32 2010 +0200 Treat all integer multiplier suffixes as base-2. Originally both base-2 and base-10 were supported, but since there seems to be little need for base-10 in XZ Utils, treat everything as base-2 and also be more relaxed about the case of the first letter of the suffix. Now xz will accept e.g. KiB, Ki, k, K, kB, and KB, and interpret them all as 1024. The recommended spelling of the suffixes are still KiB, MiB, and GiB. commit 00fc1211ae7b687ac912098f4479112059deccbd Author: Lasse Collin Date: Sun Mar 7 13:50:23 2010 +0200 Consistently round up the memory usage limit in messages. It still feels a bit wrong to round 1 byte to 1 MiB but at least it is now done consistently so that the same byte value is always rounded the same way to MiB. commit 9886d436ff5615fc70eef32ff757b1e934069621 Author: Lasse Collin Date: Sun Mar 7 13:34:34 2010 +0200 Change the default of --enable-assume-ram from 32 to 128 MiB. This is to allow files created with "xz -9" to be decompressed if the amount of RAM cannot be determined. commit 2672bcc9f85ba28ff648e092e9eb4cd9e69ce418 Author: Lasse Collin Date: Sun Mar 7 13:29:28 2010 +0200 Increase the default memory usage limit on "low-memory" systems. Previously the default limit was always 40 % of RAM. The new limit is a little bit more complex: - If 40 % of RAM is at least 80 MiB, 40 % of RAM is used as the limit. - If 80 % of RAM is over 80 MiB, 80 MiB is used as the limit. - Otherwise 80 % of RAM is used as the limit. This should make it possible to decompress files created with "xz -9" on more systems. Swapping is generally more expected on systems with less RAM, so higher default limit on them shouldn't cause too bad surprises in terms of heavy swapping. Instead, the higher default limit should reduce the number of bad surprises when it used to prevent decompression of files created with "xz -9". The DoS prevention system shouldn't be a DoS itself. Note that even with the new default limit, a system with 64 MiB RAM cannot decompress files created with "xz -9" without user overriding the limit. This should be OK, because if xz is going to need more memory than the system has RAM, it will run very very slowly and thus it's good that user has to override the limit in that case. commit 5527b7269a997e7f335d60f237a64bbf225d9dc7 Author: Lasse Collin Date: Sat Mar 6 21:36:19 2010 +0200 Updated THANKS. commit d0d1c51aea4351288a7e533cce28cb7f852f6b05 Author: Lasse Collin Date: Sat Mar 6 21:17:20 2010 +0200 Fix missing initialization in lzma_strm_init(). With bad luck, lzma_code() could return LZMA_BUF_ERROR when it shouldn't. This has been here since the early days of liblzma. It got triggered by the modifications made to the xz tool in commit 18c10c30d2833f394cd7bce0e6a821044b15832f but only when decompressing .lzma files. Somehow I managed to miss testing that with Valgrind earlier. This fixes . Thanks to Rafał Mużyło for helping to debug it on IRC. commit eb7d51a3faf9298c0c7aa9aaeae1023dcf9e37ea Author: Lasse Collin Date: Fri Feb 12 13:16:15 2010 +0200 Collection of language fixes to comments and docs. Thanks to Jonathan Nieder. commit 4785f2021aa6a23f1caf724fcc823e562584f225 Author: Lasse Collin Date: Fri Feb 12 12:41:20 2010 +0200 Fix jl -> jb in ASM files. commit 6b50c9429bf85521d355adc61745d06ee017f8c8 Author: Lasse Collin Date: Fri Feb 12 12:31:22 2010 +0200 Use __APPLE__ instead of __MACH__ in ASM files. This allows the files to work on HURD. Thanks to Jonathan Nieder. commit 6503fde658a5cdbdd907a788865470dd64771601 Author: Lasse Collin Date: Sun Feb 7 19:48:06 2010 +0200 Subtle change to liblzma Block handling API. lzma_block.version has to be initialized even for lzma_block_header_decode(). This way a future version of liblzma won't allocate memory in a way that an old application doesn't know how to free it. The subtlety of this change is that all current apps using lzma_block_header_decode() will keep working for now, because the only possible version value is zero, and lzma_block_header_decode() unconditionally sets the version to zero even now. Unless fixed, these apps will break in the future if a new version of the Block options is ever needed. commit dd7c3841ff78cb94ce02b0220c6e4748460970f7 Author: Lasse Collin Date: Tue Feb 2 11:50:11 2010 +0200 Fix wrong assertion. This was added in 455e68c030fde8a8c2f5e254c3b3ab9489bf3735. commit 9d67588c1597849504a3e5ac8bf6f06e7d2ee8be Author: Lasse Collin Date: Mon Feb 1 22:48:42 2010 +0200 Updated TODO. commit fef6333f52c8801308c3b78acb7942988541d137 Author: Lasse Collin Date: Mon Feb 1 22:47:54 2010 +0200 Fix typos in comments. commit 455e68c030fde8a8c2f5e254c3b3ab9489bf3735 Author: Lasse Collin Date: Mon Feb 1 22:46:56 2010 +0200 Fix signal handling for --list. commit 82220a149015616f75641ee8bbea415137535b9b Author: Lasse Collin Date: Mon Feb 1 11:44:45 2010 +0200 Fix compression of symlinks with --force. xz --force accepted symlinks, but didn't remove them after successful compression. Instead, an error message was displayed. commit d4da177d5ba3d2ef7323a6f1e06ca16e0478810e Author: Lasse Collin Date: Mon Feb 1 10:20:57 2010 +0200 Fix a comment. commit f9dd797a423a148903cf345b4146cb1fe1eab11d Author: Lasse Collin Date: Sun Jan 31 23:43:54 2010 +0200 Updated THANKS. commit ee5ddb8b28419fe4923ded5c18a50570a762dcab Author: Lasse Collin Date: Sun Jan 31 23:41:29 2010 +0200 Updated TODO. commit 11936ad3f5a2e97bda3463c7a56a2f4bb9265ea6 Author: Lasse Collin Date: Sun Jan 31 23:35:04 2010 +0200 Mention TODO in README. commit 2901a8e7e82af05675b8cd8758a8ceddb111359f Author: Lasse Collin Date: Sun Jan 31 23:31:14 2010 +0200 Updated INSTALL. commit 8884e16864ba53fb4b58623d7537d7ef30c28e11 Author: Lasse Collin Date: Sun Jan 31 23:28:51 2010 +0200 Revise the Windows build files. The old Makefile + config.h was deleted, because it becomes outdated too easily and building with the Autotools based build system works fine even on Windows. windows/build.sh hasn't got much testing, but it should work to build 32-bit x86 and x86-64 versions of XZ Utils using MSYS, MinGW or MinGW-w32, and MinGW-w64. windows/INSTALL-Windows.txt describes what packages are needed and how to install them. windows/README-Windows.txt is a readme file for the binary package that build.sh hopefully builds. There are no instructions about using Autotools for now, so those using a git snapshot may want to run "autoreconf -fi && ./configure && make mydist" on a UN*X box and then copy the resulting .tar.gz to a Windows. commit 34eb5e201d62f7f46bbe6fe97cfe08cb31b3b88c Author: Lasse Collin Date: Sun Jan 31 19:52:38 2010 +0200 Select the default integrity check type at runtime. Previously it was set statically to CRC64 or CRC32 depending on options passed to the configure script. commit 96a4f840e3b9ca5c81e5711ff9c267b194f93ef1 Author: Lasse Collin Date: Sun Jan 31 18:17:50 2010 +0200 Improve displaying of the memory usage limit. commit b3cc4d8edd68a0250cc69680c99b9f7343f99cf2 Author: Lasse Collin Date: Sun Jan 31 12:53:56 2010 +0200 Don't use uninitialized sigset_t. If signal handlers haven't been established, then it's useless to try to block them, especially since the sigset_t used for blocking hasn't been initialized yet. commit 231c3c7098f1099a56abb8afece76fc9b8699f05 Author: Lasse Collin Date: Sun Jan 31 12:01:54 2010 +0200 Delay opening the destionation file and other fixes. The opening of the destination file is now delayed a little. The coder is initialized, and if decompressing, the memory usage of the first Block compared against the memory usage limit before the destination file is opened. This means that if --force was used, the old "target" file won't be deleted so easily when something goes wrong very early. Thanks to Mark K for the bug report. The above fix required some changes to progress message handling. Now there is a separate function for setting and printing the filename. It is used also in list.c. list_file() now handles stdin correctly (gives an error). A useless check for user_abort was removed from file_io.c. commit 0dbd0641db99d5e73d51d04ce7a71e52dc6b4105 Author: Lasse Collin Date: Fri Jan 29 22:48:04 2010 +0200 Add list.h to src/xz/Makefile.am. This should have been already in 0bc9eab243dee3be764b3530433a7fcdc3f7c6a1. commit b4b1a56e0cbd597157858264f5c7189201ac9018 Author: Lasse Collin Date: Fri Jan 29 13:24:27 2010 +0200 Add lzmainfo.1 to manfiles list to convert to .txt and .pdf. commit 5574d64e03ad3a3d6e00e4b0d3e81c7b5529ec95 Author: Lasse Collin Date: Wed Jan 27 16:42:11 2010 +0200 Silence two compiler warnings on DOS-like systems. commit b063cc34a30a4edf109343ff373b2b62b8ca72d3 Author: Lasse Collin Date: Wed Jan 27 13:31:03 2010 +0200 Use PACKAGE_URL instead of custom PACKAGE_HOMEPAGE. commit 38b8035b5cb5f56457c5fa5a891d6900fcf5984f Author: Lasse Collin Date: Tue Jan 26 23:37:46 2010 +0200 Add a missing space to an error message. Thanks to Robert Readman. commit e5496f9628ff5979392a80421d0b63a4de8015b4 Author: Lasse Collin Date: Tue Jan 26 22:53:37 2010 +0200 Use past tense in error message in io_unlink(). Added a note to translators too. Thanks to Robert Readman. commit d9a9800597ea540090e434132c3b511217df0a2b Author: Lasse Collin Date: Tue Jan 26 15:42:24 2010 +0200 Fix too small static buffer in util.c. This was introduced in 0dd6d007669b946543ca939a44243833c79e08f4 two days ago. commit d0b4bbf5da068503c099cd456e294d7673548cc0 Author: Lasse Collin Date: Tue Jan 26 14:46:43 2010 +0200 Minor comment fix. commit 0bc9eab243dee3be764b3530433a7fcdc3f7c6a1 Author: Lasse Collin Date: Sun Jan 24 23:50:54 2010 +0200 Add initial version of xz --list. This is a bit rough but should be useful for basic things. Ideas (with detailed examples) about the output format are welcome. The output of --robot --list is not necessarily stable yet, although I don't currently have any plans about changing it. The man page hasn't been updated yet. commit df254ce03be016e217b511e7acd5d493f9929ca5 Author: Lasse Collin Date: Sun Jan 24 22:46:11 2010 +0200 Add io_pread(). It will be used by --list. commit ef68dd4a92976276304de2aedfbe34ae91a86abb Author: Lasse Collin Date: Sun Jan 24 22:45:14 2010 +0200 Set LC_NUMERIC=C when --robot is used. It is to ensure that floating point numbers will always have a dot as the decimal separator. commit 0dd6d007669b946543ca939a44243833c79e08f4 Author: Lasse Collin Date: Sun Jan 24 16:57:40 2010 +0200 Some improvements to printing sizes in xz. commit 2a98fdffd68c66371279c211c29153c808ad5c1d Author: Lasse Collin Date: Wed Jan 20 22:02:35 2010 +0200 Fix a typo in README. Thanks to R. Bijker. commit 07a11dad44e041b01dcfc73e8d4e00731158c06d Author: Lasse Collin Date: Sun Jan 17 11:59:54 2010 +0200 Updated windows/Makefile. Thanks to Dan Shechter for the patch. It is likely that windows/Makefile will be removed completely, because Autotols based build nowadays works well with both 32-bit and 64-bit MinGW (I just need to update the docs). commit 37f31ead9d2b4e467df11450cf29ed7d7e3e25f3 Author: Lasse Collin Date: Fri Jan 15 11:05:11 2010 +0200 Update the xz man page to match the previous two commits. commit 3ffd5d81a43210c8da56da5c5b3637d3f8bc63c7 Author: Lasse Collin Date: Wed Jan 13 19:10:25 2010 +0200 Don't read compressed data from a terminal or write it to a terminal even if --force is specified. It just seems more reasonable this way. The new behavior matches bzip2. The old one matched gzip. commit 23ac2c44c3ac76994825adb7f9a8f719f78b5ee4 Author: Lasse Collin Date: Wed Jan 13 18:12:40 2010 +0200 Don't compress or decompress special files unless writing to stdout even if --force is used. --force will still enable compression of symlinks, but only in case they point to a regular file. The new way simply seems more reasonable. It matches gzip's behavior while the old one matched bzip2's behavior. commit cee12aa852ec0902983dc1f153346ef750157fb9 Author: Lasse Collin Date: Tue Jan 12 16:30:33 2010 +0200 Updated THANKS. commit 153c7740c54b3c90129dbd3d6153ac1303c4d605 Author: Lasse Collin Date: Tue Jan 12 16:18:14 2010 +0200 Add IRIX-specific code to tuklib_physmem and tuklib_cpucores. This is untested but it will get tested soon and, if needed, fixed before 5.0.0. Thanks to Stuart Shelton. commit 8ea8dc754a7a5bc2d60db1eac201839cabdab6a1 Author: Lasse Collin Date: Fri Jan 1 00:29:10 2010 +0200 Fix _memconfig() functions. This affects lzma_memusage() and lzma_memlimit_get(). commit 1a7ec87c8ee61dfc2e496d2e1fb7ab0939804691 Author: Lasse Collin Date: Thu Dec 31 22:45:53 2009 +0200 Revised the Index handling code. This breaks API and ABI but most apps are not affected since most apps don't use this part of the API. You will get a compile error if you are using anything that got broken. Summary of changes: - Ability to store Stream Flags, which are needed for random-access reading in multi-Stream files. - Separate function to set size of Stream Padding. - Iterator structure makes it possible to read the same lzma_index from multiple threads at the same time. - A lot faster code to locate Blocks. - Removed lzma_index_equal() without adding anything to replace it. I don't know what it should do exactly with the new features and what actually needs this function in the first place other than test_index.c, which now has its own code to compare lzma_indexes. commit f29997a846e673cb3b8cbd57de47ed313b3978bb Author: Lasse Collin Date: Thu Dec 31 21:13:25 2009 +0200 Remove c-format tag in cs.po. It was fixed in the C code earlier. commit 097bad000363e0bf29f8274ad2d7ab59f7dbf644 Author: Lasse Collin Date: Thu Dec 31 21:11:05 2009 +0200 Add missing lzma_nothrow in filter.h. commit b56cb1fc31fa2381f92eefc040df85667048d626 Author: Lasse Collin Date: Wed Dec 9 18:13:44 2009 +0200 Remove redefinition of _(msgid) macro from lzmainfo.c. commit 171b03febfe09d9fae6ac8be6aa4518bcaf427d2 Author: Jonathan Nieder Date: Tue Dec 8 19:41:57 2009 -0600 update po/.gitignore Since the *.gmo files are deleted by the maintainer-clean target, I assume they are not meant to be tracked. Also add the other files listed in the Makefile’s clean targets (stamp-poT, xz.po, xz.[12].po, *.new.po, xz.mo) to make sure they are not accidentally tracked. Most of these are intermediate files that would not appear unless a build is interrupted or fails. Split the list of untracked files by origin to make it easier to tell if files are missing in the future. Signed-off-by: Jonathan Nieder commit f7e44c6c11f630519072971b8b07a5729c096c36 Author: Lasse Collin Date: Wed Dec 9 00:38:55 2009 +0200 Always rely on GCC's auto-import on Windows. I understood that this is nicer, because then people don't need to worry about the LZMA_API_STATIC macro. Thanks to Charles Wilson and Keith Marshall. commit 7b76a3e2336f25088957cba92b0dbd854d9caa3c Author: Lasse Collin Date: Mon Dec 7 21:46:53 2009 +0200 Fix file_io.c on DOS-like systems. The problem was introduced when adding sparse file support in 465d1b0d6518c5d980f2db4c2d769f9905bdd902. Thanks to Charles Wilson. commit 0696f5d268362221380e039bad48a86e29067c6a Author: Lasse Collin Date: Mon Dec 7 20:54:21 2009 +0200 Add Czech translation. Thanks to Marek Černocký. Other people planning to translate xz: Note that the messages are a little bit in flux still. Translations are still welcome, just be prepared to some extra work in case there are changes. commit 5e817a50d276f0a3607638c1c1d449d50b9aa4e5 Author: Lasse Collin Date: Mon Dec 7 20:32:08 2009 +0200 Add a note for translators to add a bug reporting address for translation bugs. commit 6db1c35be9e1e364cdacff6878910e1b7aac2a37 Author: Lasse Collin Date: Mon Dec 7 20:07:02 2009 +0200 Prevent xgettext from taking one regular string as a C format string. Thanks to Marek Černocký. commit e0c2776b6ffbd2b1900fde353aceac734edc93d7 Author: Lasse Collin Date: Sat Nov 28 17:45:22 2009 +0200 Remove duplicate code in io_open_dest(). Fix a missing _() in the error message too. commit f057a33c6f7c5992389479f2d4feabf2900ba7ee Author: Lasse Collin Date: Thu Nov 26 10:11:23 2009 +0200 Typo fix to sysdefs.h. Thanks to Jonathan Nieder. commit 8767b41534eafdf5e742e12190646bf5740b0cdb Author: Lasse Collin Date: Thu Nov 26 10:10:36 2009 +0200 Fix a memory leak in test_index.c. This was introduced in bd13b04e202b6f495a68eb0766f97085b7c50a06. Thanks to Jim Meyering for noticing it. commit 919fbaff860acdaa4bcd216500a0b1c960a6db92 Author: Lasse Collin Date: Wed Nov 25 14:22:19 2009 +0200 Add missing error check to coder.c. With bad luck this could cause a segfault due to reading (but not writing) past the end of the buffer. commit bd13b04e202b6f495a68eb0766f97085b7c50a06 Author: Lasse Collin Date: Wed Nov 25 13:04:10 2009 +0200 Fix bugs in lzma_index_read() and lzma_index_cat(). lzma_index_read() didn't skip over Stream Padding if it was the first record in the Index. lzma_index_cat() didn't combine small Indexes correctly. The test suite was updated to check for these bugs. These bugs didn't affect the xz command line tool or most users of liblzma in any way. commit 1f196909143b888e062bd9a0c4ba8c34d3019bfa Author: Lasse Collin Date: Wed Nov 25 12:52:56 2009 +0200 Index decoder fixes. The Index decoder code didn't perfectly match the API docs, which said that *i will be set to point to the decoded Index only after decoding has succeeded. The docs were a bit unclear too. Now the decoder will initially set *i to NULL. *i will be set to point to the decoded Index once decoding has succeeded. This simplifies applications too, since it avoids dangling pointers. commit 465d1b0d6518c5d980f2db4c2d769f9905bdd902 Author: Lasse Collin Date: Wed Nov 25 11:19:20 2009 +0200 Create sparse files by default when decompressing into a regular file. Sparse file creation can be disabled with --no-sparse. I don't promise yet that the name of this option won't change before 5.0.0. It's possible that the code, that checks when it is safe to use sparse output on stdout, is not good enough, and a more flexible command line option is needed to configure sparse file handling. commit 37de544414fc2dc5039471d1002ebd015eb3e627 Author: Lasse Collin Date: Sun Nov 22 12:43:06 2009 +0200 Updated THANKS. commit f1a28b96c900c658fe016852ff62f6c24d1f50fa Author: Lasse Collin Date: Sun Nov 22 12:05:33 2009 +0200 Add missing consts to pointer casts. commit b9b5c54cd438b3ae47b44cc211b71f3bc53e35ef Author: Lasse Collin Date: Sun Nov 22 12:00:30 2009 +0200 Enable assembler code only if it is known to work on that operating system. I'm too lazy to think how to make a good Autoconf test for this and it's not that important anyway. No longer define HAVE_ASM_X86 or HAVE_ASM_X86_64. Inline assembler (if any) is used if a macro like __i386__ or __x86_64__ is defined. commit 0733f4c9994db696420a405810d5f02c79ebc404 Author: Lasse Collin Date: Sun Nov 22 11:55:03 2009 +0200 Make fastpos.h use tuklib_integer.h instead of bsr.h when --enable-small has been specified. commit 7ac3985d891dcc5773543f84cc5bce6c14841b12 Author: Lasse Collin Date: Sun Nov 22 11:52:30 2009 +0200 Update tuklib_integer.h with bit scan functions. Thanks to Joachim Henke for the original patch. commit c74c132f7f79a842c073c66575a4fdb985e4c2e3 Author: Lasse Collin Date: Fri Nov 20 12:51:19 2009 +0200 Update tuklib_cpucores.m4 and tuklib_physmem.m4 from tuklib, which now use AC_CACHE_CHECK. Using the cache variable, configure now warns if there is no method to detect the amount of RAM and recommends using --enable-assume-ram. commit d315ca4930ff96e1428c6021c96f209e1abdd83e Author: Lasse Collin Date: Mon Nov 16 18:16:45 2009 +0200 Add support for --info-memory and --robot to xz. Currently --robot works only with --info-memory and --version. --help and --long-help work too, but --robot has no effect on them. Thanks to Jonathan Nieder for the original patches. commit e330fb7e6b8162894280c8a3dc22fdc05cd2d85e Author: Lasse Collin Date: Sun Nov 15 12:54:45 2009 +0200 Fix wrong indentation caused by incorrect settings in the text editor. commit 93e418562cf127a9171e87bcd4e9af8e1bfcdae4 Author: Lasse Collin Date: Sun Nov 15 12:40:17 2009 +0200 Add lzma_physmem(). I had hoped to keep liblzma as purely a compression library as possible (e.g. file I/O will go into a different library), but it seems that applications linking agaisnt liblzma need some way to determine the memory usage limit, and knowing the amount of RAM is one reasonable way to help making such decisions. Thanks to Jonathan Nieder for the original patch. commit cf39faca59083d38422058c6c97aa757ea7797d0 Author: Lasse Collin Date: Sat Nov 14 20:21:19 2009 +0200 Updated THANKS. commit 2ddcae247c284cc2f396b6cfdab57790c7588b5f Author: Lasse Collin Date: Sat Nov 14 20:20:03 2009 +0200 Some updates to xz man page. commit 19b2674f07f8b588dfaf6638396b4b42866d7e23 Author: Lasse Collin Date: Sat Nov 14 19:51:03 2009 +0200 Fix description of --memory in --long-help. commit 2291346f0cccf88e605d84b75c9c5aaaaddb5df8 Author: Lasse Collin Date: Sat Nov 14 19:45:39 2009 +0200 Update the debug programs so that they compile again. commit 418d64a32e8144210f98a810738fed5a897e8367 Author: Lasse Collin Date: Sat Nov 14 18:59:19 2009 +0200 Fix a design error in liblzma API. Originally the idea was that using LZMA_FULL_FLUSH with Stream encoder would read the filter chain from the same array that was used to intialize the Stream encoder. Since most apps wouldn't use LZMA_FULL_FLUSH, most apps wouldn't need to keep the filter chain available after initializing the Stream encoder. However, due to my mistake, it actually required keeping the array always available. Since setting the new filter chain via the array used at initialization time is not a nice way to do it for a couple of reasons, this commit ditches it and introduces lzma_filters_update(). This new function replaces also the "persistent" flag used by LZMA2 (and to-be-designed Subblock filter), which was also an ugly thing to do. Thanks to Alexey Tourbin for reminding me about the problem that Stream encoder used to require keeping the filter chain allocated. commit f0bf7634b77263a4dd02b20c71861ab67995da68 Author: Lasse Collin Date: Sat Oct 17 11:11:58 2009 +0300 Fix wrong function name in the previous commit. It was meant to be lzma_filters_copy(), not lzma_filters_dup(). commit 6d118a0b9def82e96afba7386ec8d7da0b59649f Author: Lasse Collin Date: Sat Oct 17 01:47:07 2009 +0300 Add lzma_filters_copy(). This will be needed internally by liblzma once I fix a design mistake in the encoder API. This function may be useful to applications too so it's good to export it. commit 78e92c18470483e161388e679c1ee556adb3a691 Author: Jonathan Nieder Date: Thu Oct 15 20:44:13 2009 -0500 Escape dashes in xzmore.1 A minus sign is larger, easier to see in a printout, and more likely to use the same glyph as ASCII hyphen-minus in a terminal than a hyphen. Since broken manual pagers do not find hyphens when the user searches for a hyphen-minus, minus signs are also easier to search for. So use minus signs instead of hyphens to render sample terminal output. commit 7b7fe902d98da28e5769e2aa1e0c08c92384f7ee Author: Lasse Collin Date: Fri Oct 16 20:35:39 2009 +0300 Mention --check=none in --long-help. It was already in the man page though. Thanks to Jim Meyering for noticing this. commit ebfb2c5e1f344e5c6e549b9dedaa49b0749a4a24 Author: Lasse Collin Date: Sun Oct 4 22:57:12 2009 +0300 Use a tuklib module for integer handling. This replaces bswap.h and integer.h. The tuklib module uses on GNU, on *BSDs and on Solaris, which may contain optimized code like inline assembly. commit 29fd321033276261b87da7be5223db33d879a4c7 Author: Lasse Collin Date: Fri Oct 2 14:35:56 2009 +0300 Add support for --enable-assume-ram=SIZE. commit 3782b3fee4812b0dd4ffdfa6563ed49f73060f25 Author: Lasse Collin Date: Fri Oct 2 11:28:17 2009 +0300 Use unaligned access (if possible) on both endiannesses in lz_encoder_hash.h. commit c5f68b5cc79085a87f950fea53843e27f328068e Author: Lasse Collin Date: Fri Oct 2 11:03:26 2009 +0300 Make liblzma produce the same output on both endiannesses. Seems that it is a problem in some cases if the same version of XZ Utils produces different output on different endiannesses, so this commit fixes that problem. The output will still vary between different XZ Utils versions, but I cannot avoid that for now. This commit bloatens the code on big endian systems by 1 KiB, which should be OK since liblzma is bloated already. ;-) commit 4a84d1adfda35e4fb4d41ecf0feb8223b100517a Author: Mike Frysinger Date: Sat Sep 26 12:51:50 2009 -0400 add lzmainfo to gitignore Signed-off-by: Mike Frysinger commit 188a1dcd0cc7867810ed3a55c598d0680922c63b Author: Lasse Collin Date: Sun Sep 27 11:53:36 2009 +0300 Updated THANKS. commit db9119b9181b307e7ac5d2bae82444d04b902b59 Author: Lasse Collin Date: Sun Sep 27 11:48:54 2009 +0300 Work around a bug in Interix header files. Thanks to Markus Duft for the patch. commit b3d105e69786a45963176fd2193abe75e05ba738 Author: Lasse Collin Date: Thu Sep 24 17:50:17 2009 +0300 Fix an error in OpenVMS-specific code. Thanks to Jouk Jansen. commit 5e000ff00d4d01e559397b49eb648ad3f159d496 Author: Lasse Collin Date: Tue Sep 22 18:59:56 2009 +0300 Added OpenVMS-specific information to INSTALL. commit 932b2e204463d70f3eee5b8a1ea5a23bf9d001a4 Author: Lasse Collin Date: Tue Sep 22 14:03:02 2009 +0300 Better fixes for OpenVMS support. Thanks to Jouk Jansen. commit 4c3630ec4179fe9265407a35c4db1374ffc82372 Author: Lasse Collin Date: Tue Sep 22 13:40:19 2009 +0300 Avoid non-standard preprocessor construct. Thanks to Jouk Jansen. commit 0deb1bb60addd1306b525e0ac0ad2a84eb0390d9 Author: Lasse Collin Date: Mon Sep 21 19:50:09 2009 +0300 Make sure that TUKLIB_DOSLIKE doesn't get defined on Cygwin. Thanks to Charles Wilson. commit e599bba4216c0edb8cc8f40adad3a6dba88685f4 Author: Lasse Collin Date: Sat Sep 19 09:47:30 2009 +0300 Various changes. Separate a few reusable components from XZ Utils specific code. The reusable code is now in "tuklib" modules. A few more could be separated still, e.g. bswap.h. Fix some bugs in lzmainfo. Fix physmem and cpucores code on OS/2. Thanks to Elbert Pol for help. Add OpenVMS support into physmem. Add a few #ifdefs to ease building XZ Utils on OpenVMS. Thanks to Jouk Jansen for the original patch. commit 49cfc8d392cf535f8dd10233225b1fc726fec9ef Author: Lasse Collin Date: Tue Sep 15 21:07:23 2009 +0300 Fix incorrect use of "restrict". commit 15ffd675ab7af84592eb1c23b0e9f4699aa0fd8c Author: Lasse Collin Date: Sat Sep 12 14:09:17 2009 +0300 Fix GCC version check for nothrow attribute. commit 6bfdd3a88a819f04c8f202e7d3c6f88a01c7d224 Author: Lasse Collin Date: Sat Sep 12 14:08:15 2009 +0300 Updated THANKS. commit 4ab7b16b9573bdfa32279e4adadff684d5cd58ac Author: Lasse Collin Date: Sat Sep 12 14:07:36 2009 +0300 A few grammar fixes. Thanks to Christian Weisgerber for pointing out some of these. commit 8905a33daadcd2d6557c83c81c490b827d566c94 Author: Lasse Collin Date: Fri Sep 11 17:08:15 2009 +0300 Updated THANKS. commit 68059334ff435300ab1ce2c616b0eee1b0d88dd9 Author: Lasse Collin Date: Fri Sep 11 17:06:32 2009 +0300 Add PACKAGE_HOMEPAGE to {windows,dos}/config.h to fix build errors. commit 221be761f467da76875247bc02d7a1716682075d Author: Lasse Collin Date: Fri Sep 11 10:24:09 2009 +0300 Use $(LN_EXEEXT) in symlinks to executables. This fixes "make install" on operating systems using a suffix for executables. Cygwin is treated specially. The symlink names won't have .exe suffix even though the executables themselves have. Thanks to Charles Wilson. commit 18a4233a53d9b82abac7db7d7804684c5fea9c2c Author: Lasse Collin Date: Fri Sep 11 09:25:09 2009 +0300 Fix a couple of warnings. commit 429910b2ba67611d8df60d1a9da9641bdb5f82b4 Author: Lasse Collin Date: Sat Sep 5 18:39:21 2009 +0300 Add OS/2-specific code to physmem.h. Also move DJGPP-specific code near the code meant for other DOS-like systems. commit 7aca7b3174bcbba4a4915682ff0cd405d63f5740 Author: Lasse Collin Date: Sat Sep 5 01:21:15 2009 +0300 Updated THANKS. commit 60ccb80c9c4a0d771acc5b7d9d6f32b17fed1071 Author: Lasse Collin Date: Sat Sep 5 01:20:29 2009 +0300 Use sysctl() != -1 instead of !sysctl() to check if the function call succeeded. NetBSD 4.0 returns positive values on success, but NetBSD Current and FreeBSD return zero. OpenBSD's man page doesn't tell what sysctl() returns on success. All these BSDs return -1 on error. Thanks to Robert Elz and Thomas Klausner. commit 173368911cf09ab0b03fc4db8f3d4b81d86dce32 Author: Lasse Collin Date: Wed Sep 2 09:43:51 2009 +0300 Mention in INSTALL that --enable-small doesn't modify CFLAGS. commit 319a0fd7d7e9ebbb71ca6930abfc20777cb4aacc Author: Lasse Collin Date: Tue Sep 1 20:40:01 2009 +0300 Refactored option parsing. commit 25adaaa56e2e51a47a910a8d73452414619a2e53 Author: Lasse Collin Date: Tue Sep 1 20:23:30 2009 +0300 Fix options parsing bug in xz. xz used to reject "xz --lzma2=pb=2," while "xz --lzma2=pb=2,," worked. Now both work. commit 5f6dddc6c911df02ba660564e78e6de80947c947 Author: Lasse Collin Date: Tue Sep 1 20:20:19 2009 +0300 Updated TODO. commit 655457b9ada5ec7db398c5392e41290f3f332ea8 Author: Lasse Collin Date: Mon Aug 31 21:59:25 2009 +0300 Revert 43f44160b1ddcbf7e5205c37db09b3bebe7226f9 and use a fix that works on all systems using GNU assembler. Maybe the assembler code is used e.g. on Solaris x86 but let's worry about it if this doesn't work on it. commit 162189c3477953805a28f96d3a75cb9ab9417928 Author: Lasse Collin Date: Sun Aug 30 17:29:19 2009 +0300 Updated THANKS. commit 2331f5f97af3e5897e23da45d9df3d664099c7f8 Author: Lasse Collin Date: Sun Aug 30 17:28:52 2009 +0300 Add more OS/2 specific info to INSTALL. commit 94c66b3297b3ad307eee93cf6b160e3c43997f11 Author: Lasse Collin Date: Sat Aug 29 14:43:52 2009 +0300 Use even more hackish way to support thousand separators. Seems that in addition on Windows and DOS, also OpenBSD lacks support for %'d style printf() format strings. So far that is the only modern POSIX-like system I know with this problem, but after this hack, the thousand separator shouldn't be a problem on any system. Maybe testing if a format string like %'d produces reasonable output is invoking undefined behavior on some systems, but so far all the problematic systems I've tried just print the raw format string (e.g. %'d prints 'd). Maybe Autoconf test would have been better, but this hack works also for cross-compilation, and avoids recompilation in case the system libc starts to support the thousand separator. commit 3432e9c6aab851da1227b63dce645d7f190c04d8 Author: Lasse Collin Date: Sat Aug 29 13:42:56 2009 +0300 Updated THANKS. commit 27414daadf5727e8ab942374b5ec1c8990122878 Author: Lasse Collin Date: Sat Aug 29 13:39:21 2009 +0300 Fix sysctl() usage. This fixes build on *BSDs and Darwin. Thanks to Jukka Salmi for the patches. Richard Koch reported the problem too. commit 43f44160b1ddcbf7e5205c37db09b3bebe7226f9 Author: Lasse Collin Date: Sat Aug 29 13:35:23 2009 +0300 Fix x86 assembler on GCC 3. Thanks to Karl Berry. commit 682efdc1f9492fdd76c9ce82e7c00ca0768067e8 Author: Lasse Collin Date: Thu Aug 27 18:36:59 2009 +0300 "make dist" fixes commit c8c184db1c95bf70f78256ec6237845a57f342af Author: Lasse Collin Date: Thu Aug 27 17:08:33 2009 +0300 Update xz man page date. commit 9756fce565e98b8fa5fe6ead296d84e7601ec254 Author: Lasse Collin Date: Thu Aug 27 17:00:22 2009 +0300 Fix the debug directory. 6a2eb54092fc625d59921a607ff68cd1a90aa898 and 71f18e8a066a01dda0c8e5508b135ef104e43e4c required some changes that weren't applied in debug. commit 77007a7fb20187fcf3d1dd9839c79ace2d63f2ea Author: Lasse Collin Date: Thu Aug 27 16:36:40 2009 +0300 Add missing files to EXTRA_DIST. commit 04dcbfdeb921e5f361a4487134e91e23fffbe09d Author: Lasse Collin Date: Thu Aug 27 16:21:22 2009 +0300 Bumped version to 4.999.9beta. commit fd7618611a22f42a6913bc8d518c9bbc9252d6b4 Author: Lasse Collin Date: Thu Aug 27 16:17:47 2009 +0300 Updated THANKS. commit c29e76c0f910fca0a90a50b78d337f6c32623e9d Author: Lasse Collin Date: Thu Aug 27 16:12:52 2009 +0300 .xz file format specification 1.0.4 (probably). Thanks to Christian von Roques, Peter Lawler, and Jim Meyering for the fixes. commit 696d7ee3953beaf4f0ed18e78917ccf300431966 Author: Lasse Collin Date: Thu Aug 27 15:43:54 2009 +0300 Require GNU libtool 2.2. commit 4c3558aa8305a8f8b6c43b8569eb539717ca9e8d Author: Lasse Collin Date: Thu Aug 27 15:34:45 2009 +0300 Add "dos" to EXTRA_DIST. commit 35b29e4424ced5a3ababf132283e519080c7b298 Author: Lasse Collin Date: Thu Aug 27 15:23:27 2009 +0300 Updated TODO. commit 23414377192c21f3f34c84cdfe0ef0fbd06a1dea Author: Lasse Collin Date: Thu Aug 27 15:17:00 2009 +0300 Some xz man page improvements. commit 371b04e19fc9051dbaeec51ec0badec6a1f0699d Author: Lasse Collin Date: Thu Aug 27 10:41:01 2009 +0300 Removed doc/bugs.txt. commit d88c4072b36d3a76f839185799fb1d91037a1b81 Author: Lasse Collin Date: Thu Aug 27 10:40:25 2009 +0300 Updated README. It now includes bug reporting instructions/tips. commit 92e536d8b8d33a6b12d0802bcd7be4437046f13e Author: Lasse Collin Date: Thu Aug 27 10:21:18 2009 +0300 Fix a typo in FAQ. Thanks to Jim Meyering. (From now on, I try to always remember to put the relevant thanks to commit messages.) commit 3e2ba8b58585743e59251e69ad2783eb08357079 Author: Lasse Collin Date: Thu Aug 27 10:13:46 2009 +0300 Updates to liblzma API headers. Added lzma_nothrow for every function. It adds throw() when the header is used in C++ code. Some lzma_attrs were added or removed. Lots of comments were improved. commit 8e8ebc17c535a1f8846718059b48417409c37050 Author: Lasse Collin Date: Tue Aug 18 00:30:09 2009 +0300 Install faq.txt. commit b198e770a146e4a41f91a93f0b233713f2515848 Author: Lasse Collin Date: Tue Aug 18 00:26:48 2009 +0300 Updated faq.txt. Some questions worth answering were removed, because I currently don't have good up to date answers to them. commit fe111a25cd788d31b581996e4533910388a7f0a9 Author: Lasse Collin Date: Mon Aug 17 22:45:50 2009 +0300 Some xz man changes. commit 10242a21e9abda0c5c6a03501703cc40b8a699a5 Author: Lasse Collin Date: Sun Aug 16 22:15:42 2009 +0300 Updated THANKS. commit 3ce1916c83041113b9cad9ead5c97a527cf8aa1d Author: Lasse Collin Date: Sun Aug 16 22:15:13 2009 +0300 Fix data corruption in LZ/LZMA2 encoder. Thanks to Jonathan Stott for the bug report. commit 66da129c8ec33dd66acc92f113f7c1ca740ca81a Author: Lasse Collin Date: Thu Aug 13 15:15:37 2009 +0300 Updated INSTALL and PACKAGERS to match the changes made in --enable-dynamic. commit 8238c4b2402f952c4e492e5b778aa272e57b6705 Author: Lasse Collin Date: Thu Aug 13 15:03:46 2009 +0300 Link lzmainfo against shared liblzma by default. commit 71f18e8a066a01dda0c8e5508b135ef104e43e4c Author: Lasse Collin Date: Thu Aug 13 15:00:21 2009 +0300 Make --enable-dynamic a tristate option. Some programs will by default be linked against static liblzma and some against shared liblzma. --enable-dynamic now allows overriding the default to both directions (all dynamic or all static) even when building both shared and static liblzma. This is quite messy compared to how simple thing it is supposed to be. The complexity is mostly due to Windows support. commit 5aa4678b2342dcfc1d2b31aa9fa4f39c539e4b61 Author: Lasse Collin Date: Thu Aug 13 12:56:47 2009 +0300 Fix xz Makefile.am for the man page. install-exec-hook -> install-data-hook commit e51b4e49e800bd84e6d589dca2964d3985e88139 Author: Lasse Collin Date: Thu Aug 13 12:55:45 2009 +0300 Add lzmainfo for backward compatibility with LZMA Utils. lzmainfo now links against static liblzma. In contrast to other command line tools in XZ Utils, linking lzmainfo against static liblzma by default is dumb. This will be fixed once I have fixed some related issues in configure.ac. commit a4165d0584376d948c213ec93c6065d24ff6a5e7 Author: Lasse Collin Date: Thu Aug 13 12:42:36 2009 +0300 Sync some error messages from xz to xzdec. Make xz error message translation usable outside xz (at least in upcoming lzmainfo). commit df636eb4e066b4e154ce8e66e82c87ba1db652a6 Author: Lasse Collin Date: Thu Aug 13 09:37:21 2009 +0300 Add xz man page to manfiles in toplevel Makefile.am. commit 180bdf58ea5bb07941e0a99b304d9aa832198748 Author: Lasse Collin Date: Thu Aug 13 09:37:01 2009 +0300 Fix first line of xz man page. commit e1ce2291e759b50ebfcf7cbbcc04cd098f1705a4 Author: Lasse Collin Date: Mon Aug 10 11:22:31 2009 +0300 Added a rough version of the xz man page. commit e71903fc6101f1c039d702e335b08aad1e1b4100 Author: Jonathan Nieder Date: Sun Aug 9 13:41:20 2009 -0500 “xzdiff a.xz b.xz” always fails Attempts to compare two compressed files result in no output and exit status 2. Instead of going to standard output, ‘diff’ output is being captured in the xz_status variable along with the exit status from the decompression commands. Later, when this variable is examined for nonzero status codes, numerals from dates in the ‘diff’ output make it appear as though decompression failed. So let the ‘diff’ output leak to standard output with another file descriptor. (This trick is used in all similar contexts elsewhere in xzdiff and in the analogous context in gzip’s zdiff script.) commit 1d314b81aa5b0c4530638ffabd4e0edb52e5362c Author: Jonathan Nieder Date: Sun Aug 9 13:22:12 2009 -0500 xzless: Support compressed standard input It can be somewhat confusing that less < some_file.txt works fine, whereas xzless < some_file.txt.xz does not. Since version 429, ‘less’ allows a filter specified in the LESSOPEN environment variable to preprocess its input even if it comes from standard input, if $LESSOPEN begins with ‘|-’. So set $LESSOPEN to take advantage of this feature. Check less’s version at runtime so xzless can continue to work with older versions. commit a7f5d2fe4826ac68839d00059f05004fb81d5c69 Author: Lasse Collin Date: Sun Aug 9 20:57:46 2009 +0300 GPLv2+ not GPLv2 for Doxyfile.in is probably OK. commit b735cde20cc14857136ae65a0e5d336ed7ddc862 Author: Lasse Collin Date: Sun Aug 2 00:27:29 2009 +0300 Added a copyright notice to Doxyfile.in since it contains lots of comments from Doxygen. It seems that the Doxygen authors' intent is to not apply their copyright on generated files, but since it doesn't matter for XZ Utils at all, better safe than sorry. commit 0fd157cc008446adfc8f91394f5503868025a642 Author: Lasse Collin Date: Sun Aug 2 00:11:37 2009 +0300 Updated THANKS. commit b198da96ff9ac8c89b466b4d196c5f3fe1c7904f Author: Lasse Collin Date: Sun Aug 2 00:10:22 2009 +0300 Updated TODO. commit 669413bb2db954bbfde3c4542fddbbab53891eb4 Author: Lasse Collin Date: Thu Jul 30 12:25:55 2009 +0300 Updated THANKS. commit dbbd8fb870ae789d96497911006c869d37148c15 Author: Jonathan Nieder Date: Tue Jul 28 17:37:24 2009 -0500 xzdiff: add missing ;; to case statement commit adbad2d16cb5909f85d4a429011005613ea62ffe Author: Lasse Collin Date: Fri Jul 24 13:15:06 2009 +0300 Added history.txt to doc_DATA. commit e0236f12569eb36f9b81ce7a1e52e0f73698ac27 Author: Lasse Collin Date: Fri Jul 24 12:00:40 2009 +0300 Updated .gitignore files. commit 2f34fb269265e3aba43a2a9c734020a45268826d Author: Lasse Collin Date: Fri Jul 24 11:34:02 2009 +0300 Minor improvements to COPYING. commit 0db1befcfbc120377df4b89923762f16d25f548a Author: Lasse Collin Date: Thu Jul 23 19:10:55 2009 +0300 Fix incorrect usage of getopt_long(), which caused invalid memory access if XZ_OPT was defined. commit 8f8ec942d6d21ada2096eaf063411bc8bc7e2d48 Author: Lasse Collin Date: Mon Jul 20 15:43:32 2009 +0300 Avoid internal error with --format=xz --lzma1. commit 99f9e879a6a8bb54a65da99c12e0f390216c152a Author: Lasse Collin Date: Sun Jul 19 13:14:20 2009 +0300 Major documentation update. Installation and packaging instructions were added. README and other generic docs were revised. Some of the documentation files are now installed to $docdir. commit ef4cf1851de89022cba5674784f1a8f6343c15b0 Author: Lasse Collin Date: Sun Jul 19 11:09:31 2009 +0300 Added missing author notice to xzless.in. commit 4c9c989d45b188667799a7a1d6c728ed43f7bf77 Author: Lasse Collin Date: Sat Jul 18 18:54:55 2009 +0300 Use AC_CONFIG_AUX_DIR to clean up the toplevel directory a little. Fixed a related bug in the toplevel Makefile.am. Added the build-aux directory to .gitignore. commit 366e436090a7a87215e9bf0e3ddcd55f05b50587 Author: Lasse Collin Date: Sat Jul 18 14:34:08 2009 +0300 Updated the totally outdated TODO file. commit 64e498c89d8b9966e8663f43bf64d47c26c55c62 Author: Lasse Collin Date: Sat Jul 18 11:26:39 2009 +0300 Added public domain notice into a few files. commit a35755c5de808df027675688855d1b621a4fb428 Author: Lasse Collin Date: Tue Jul 14 21:10:36 2009 +0300 Allow extra commas in filter-specific options on xz command line. This may slightly ease writing scripts that construct filter-specific option strings dynamically. commit 98f3cac1ad31191c5160a7e48398bf85141e941c Author: Lasse Collin Date: Tue Jul 14 18:04:31 2009 +0300 Accept --lzma2=preset=6e where "e" is equivalent to --extreme when no custom chain is in use. commit d873a09e956363e54bf58c577c8f7e487b6fb464 Author: Lasse Collin Date: Sun Jul 12 19:08:30 2009 +0300 Add dist-hook to create ChangeLog from the commit log, and to conver the man pages to PDF and plain text, which may be convenient to those who cannot render man pages. commit cd69a5a6c16c289f6f8e2823b03c72289472270f Author: Lasse Collin Date: Fri Jul 10 11:39:38 2009 +0300 BCJ filters: Reject invalid start offsets with LZMA_OPTIONS_ERROR. This is a quick and slightly dirty fix to make the code conform to the latest file format specification. Without this patch, it's possible to make corrupt files by specifying start offset that is not a multiple of the filter's alignment. Custom start offset is almost never used, so this was only a minor bug. The xz command line tool doesn't validate the start offset, so one will get a bit unclear error message if trying to use an invalid start offset. commit eed9953732b801f6c97317fb3160445a8754180b Author: Lasse Collin Date: Fri Jul 10 11:33:21 2009 +0300 Look for full command names instead of substrings like "un", "cat", and "lz" when determining if xz is run as unxz, xzcat, lzma, unlzma, or lzcat. This is to ensure that if xz is renamed (e.g. via --program-transform-name), it doesn't so easily work in wrong mode. commit 6f62fa88f4ff7ba78565c314c0e6e71c498fa658 Author: Lasse Collin Date: Wed Jul 8 23:06:46 2009 +0300 Updated THANKS. commit 1754b7e03e2aa7e2e0196807fe8b0f3f5a637b0e Author: Lasse Collin Date: Wed Jul 8 23:05:29 2009 +0300 Portability improvement to version.sh. commit 3bdb53792c0e3e3febe9370e56eda5b08f89410f Author: Lasse Collin Date: Wed Jul 8 22:50:16 2009 +0300 Remove --force from xzdec. It was ignored for compatibility with xz, but now that --decompress --stdout --force copies unrecognized files as is to stdout, simply ignoring --force in xzdec would be wrong. xzdec will not support copying unrecognized data as is to stdout, so it cannot support --force. commit 5f16ef4abf220028a9ddbcb138217597a9455f62 Author: Lasse Collin Date: Mon Jul 6 10:36:04 2009 +0300 Use sed instead of $(SED) so that we don't need to use AC_PROG_SED. We don't do anything fancy with sed, so this should work OK. libtool 2.2 sets SED but 1.5 doesn't, so $(SED) happened to work when using libtool 2.2. commit 96e4b257e101d72072d43e144897d92920270669 Author: Lasse Collin Date: Sun Jul 5 22:25:17 2009 +0300 Major update to the xzgrep and other scripts based on the latest versions found from gzip CVS repository. configure will try to find a POSIX shell to be used by the scripts. This should ease portability on systems which have pre-POSIX /bin/sh. xzgrep and xzdiff support .xz, .lzma, .gz, and .bz2 files. xzmore and xzless support only .xz and .lzma files. The name of the xz executable used in these scripts is now correct even if --program-transform-name has been used. commit 25cc7a6e8c2506a0d80084a4c1c67d33e7439100 Author: Lasse Collin Date: Sun Jul 5 19:26:53 2009 +0300 Use @PACKAGE_HOMEPAGE@ in liblzma.pc.in. commit 18c10c30d2833f394cd7bce0e6a821044b15832f Author: Lasse Collin Date: Sat Jul 4 00:40:44 2009 +0300 Make "xz --decompress --stdout --force" copy unrecognized files as is to standard output. This feature is needed to be more compatible with gzip's behavior. This was more complicated to implement than it sounds, because the way liblzma is able to return errors with files of only a few bytes in size. xz now has its own file type detection code and no longer uses lzma_auto_decoder(). commit 0a289c01ac821ea9c4250aa906b0ae3cfa953633 Author: Lasse Collin Date: Thu Jul 2 14:30:38 2009 +0300 Define PACKAGE_HOMEPAGE in configure.ac and use it in xz and xzdec. Use also PACKAGE_NAME instead of hardcoding "XZ Utils". commit 5cc99db5bae8633f85559e5cdaef4cd905a4ee9c Author: Lasse Collin Date: Wed Jul 1 12:21:24 2009 +0300 Avoid visibility related compiler warnings on Windows. commit 7653d1cf48080e63b189ed9d58dea0e82b6b1c5e Author: Lasse Collin Date: Tue Jun 30 17:14:39 2009 +0300 Use static liblzma by default also for tests. commit f42ee981668b545ab6d06c6072e262c29605273c Author: Lasse Collin Date: Tue Jun 30 17:09:57 2009 +0300 Build system fixes Don't use libtool convenience libraries to avoid recently discovered long-standing subtle but somewhat severe bugs in libtool (at least 1.5.22 and 2.2.6 are affected). It was found when porting XZ Utils to Windows but the problem is significant also e.g. on GNU/Linux. Unless --disable-shared is passed to configure, static library built from a set of convenience libraries will contain PIC objects. That is, while libtool builds non-PIC objects too, only PIC objects will be used from the convenience libraries. On 32-bit x86 (tested on mobile XP2400+), using PIC instead of non-PIC makes the decompressor 10 % slower with the default CFLAGS. So while xz was linked against static liblzma by default, it got the slower PIC objects unless --disable-shared was used. I tend develop and benchmark with --disable-shared due to faster build time, so I hadn't noticed the problem in benchmarks earlier. This commit also adds support for building Windows resources into liblzma and executables. commit 89dac1db6f168d7469cfbc4432651d4724c5c0de Author: Lasse Collin Date: Mon Jun 29 22:19:51 2009 +0300 Added a comment about "autoconf -fi" to autogen.sh. commit 6e685aae4594bc0af1b5032e01bb37d0edaa3ebd Author: Lasse Collin Date: Sun Jun 28 10:04:24 2009 +0300 Add -no-undefined to get shared liblzma on Windows. commit 73f560ee5fa064992b76688d9472baf139432540 Author: Lasse Collin Date: Sat Jun 27 22:57:15 2009 +0300 Make physmem() work on Cygwin 1.5 and older. commit 7ff0004fbce24ae72eddfe392828ffd7d4639ed1 Author: Lasse Collin Date: Sat Jun 27 17:28:01 2009 +0300 Moved the Windows resource files outside the windows directory to prepare for building them with Autotools. commit 449c634674f35336a4815d398172e447659a135e Author: Lasse Collin Date: Sat Jun 27 13:05:03 2009 +0300 Added missing $(EXEEXT). commit 792db79f27ad9ab1fb977e23be65c7761f545752 Author: Lasse Collin Date: Sat Jun 27 12:32:40 2009 +0300 Create correct symlinks even when --program-{prefix,suffix,transform} is passed to configure. commit 0adc72feb84f5b903f6ad9d3f759b1c326fafc6b Author: Lasse Collin Date: Sat Jun 27 10:02:24 2009 +0300 Silence a compiler warning on DOS-like systems. commit ad12edc95254ede3f0cb8dec8645e8789e984c4f Author: Lasse Collin Date: Sat Jun 27 09:35:15 2009 +0300 Updated the filenames in POTFILES.in too. commit b2b1f867532732fe9969131f8713bdd6b0731763 Author: Lasse Collin Date: Sat Jun 27 00:43:06 2009 +0300 Hopefully improved portability of the assembler code in Autotools based builds on Windows. commit c393055947247627a09b6a6b8f20aa0c32f9be16 Author: Lasse Collin Date: Fri Jun 26 21:17:29 2009 +0300 Updated THANKS (most of today's commits are based on Charles Wilson's patches). commit da0af22e4b4139b8a10710945f8b245b3a77c97d Author: Lasse Collin Date: Fri Jun 26 21:00:35 2009 +0300 Updated comments to match renamed files. commit 65014fd211dfbd4be48685998cb5a12aaa29c8d2 Author: Lasse Collin Date: Fri Jun 26 20:49:54 2009 +0300 Rename process.[hc] to coder.[hc] and io.[hc] to file_io.[hc] to avoid problems on systems with system headers with those names. commit 5e1257466dcb66f1d7a3f71814a5ad885cba43e8 Author: Lasse Collin Date: Fri Jun 26 20:43:36 2009 +0300 Rename process_file() to coder_run(). commit cad62551c5fa9865dbe0841a0b3bc729c4fbe8fc Author: Lasse Collin Date: Fri Jun 26 20:36:45 2009 +0300 Ugly hack to make it possible to use the thousand separator format character with snprintf() on POSIX systems but not on non-POSIX systems and still keep xgettext working. commit fe378d47074b16c52b00fe184d119287c68ce2e7 Author: Lasse Collin Date: Fri Jun 26 15:40:40 2009 +0300 Added missing source files to windows/Makefile. commit 390a6408563067613b29de895cb40e4d0386d62c Author: Lasse Collin Date: Fri Jun 26 15:37:53 2009 +0300 Basic support for building with Cygwin and MinGW using the Autotools based build system. It's not good yet, more fixes will follow. commit 1c9360b7d1197457aaad2f8888b99f1149861579 Author: Lasse Collin Date: Fri Jun 26 14:47:31 2009 +0300 Fix @variables@ to $(variables) in Makefile.am files. Fix the ordering of libgnu.a and LTLIBINTL on the linker command line and added missing LTLIBINTL to tests/Makefile.am. commit d45615c555e250209ebb55aa3649abe790f1eeac Author: Lasse Collin Date: Fri Jun 26 14:20:02 2009 +0300 Allow to explicitly specify autotool versions in autogen.sh. commit eaf8367368a329afa48785380f9dca6b681f3397 Author: Lasse Collin Date: Fri Jun 26 14:18:32 2009 +0300 Add version.sh to EXTRA_DIST. commit b317b218e2d383dd27a700094c0de4510540ea18 Author: Lasse Collin Date: Wed Jun 24 20:14:10 2009 +0300 Support HW_PHYSMEM64 commit ae82dde5d9cc60c80cc89601b6c51cc1611d48e7 Author: Lasse Collin Date: Wed Jun 24 13:01:59 2009 +0300 Cast a char argument to isspace() to unsigned char. commit 1735d31ea347210e914df038eeea4b2626e76e42 Author: Lasse Collin Date: Fri Jun 5 13:46:26 2009 +0300 A few more spelling fixes. Released the .xz spec 1.0.3. commit 8ed156ce894966103e895aa08f2a9fb912f6fad5 Author: Lasse Collin Date: Thu Jun 4 23:42:12 2009 +0300 Added xzdec man page. commit f6df39afaa84f71439507178a49b2a5dda6e824c Author: Lasse Collin Date: Thu Jun 4 23:26:47 2009 +0300 Harmonized xzdec --memory with xz --memory and made minor cleanups. commit 1774f27c61ce294a56712ca2f4785f90a62441bc Author: Lasse Collin Date: Thu Jun 4 22:59:55 2009 +0300 Fix purporse -> purpose. Thanks to Andrew Dudman. Released .xz spec 1.0.2 due to this fix too. commit cb613455642f48fb51059e22018615f64c59b70f Author: Lasse Collin Date: Mon Jun 1 14:53:57 2009 +0300 The .xz file format version 1.0.1 commit 083c23c680ff844846d177cfc58bb7a874e7e6b9 Author: Lasse Collin Date: Tue May 26 14:48:48 2009 +0300 Make the raw value of the Check field available to applications via lzma_block structure. This changes ABI but not doesn't break API. commit b4f5c814090dc07d4350453576305e41eb9c998d Author: Lasse Collin Date: Sat May 23 16:57:21 2009 +0300 Remove undocumented alternative option names --bcj, --ppc, and --itanium. commit b1edee2cdc7ef4411b1a21c07094ec763f071281 Author: Lasse Collin Date: Sat May 23 15:12:23 2009 +0300 Add support for specifying the BCJ filter start offset in the xz command line tool. commit 72aa0e9c5f4289f10ef5bf240a9448d3017f1ceb Author: Lasse Collin Date: Sat May 23 14:51:09 2009 +0300 Updated THANKS. commit dcedb6998cefeca6597dd1219328a3abf5acf66d Author: Lasse Collin Date: Fri May 22 16:40:50 2009 +0300 Added support for --quiet and --no-warn to xzdec. Cleaned up the --help message a little. commit 5f735dae80aa629853f4831d7b84ec1c614979eb Author: Lasse Collin Date: Fri May 22 15:11:52 2009 +0300 Use the 40 % of RAM memory usage limit in xzdec too. Update the memory usage info text in --help to match the text in xz --long-help. commit b60376249e0c586910c4121fab4f791820cc1289 Author: Lasse Collin Date: Fri May 22 14:43:00 2009 +0300 Add --no-warn. commit b4f92f522d4b854c0adb7c38be7531e1a6a7b008 Author: Lasse Collin Date: Fri May 22 14:27:40 2009 +0300 Fix a comment. commit 4dd21d23f22569285ae706b58b0e5904b8db1839 Author: Lasse Collin Date: Fri May 22 14:21:20 2009 +0300 Remove the --info option, which was an alias for --list. commit 8836139b63ce774bdd62abf17ab69b290e08229e Author: Lasse Collin Date: Fri May 22 12:27:43 2009 +0300 If xz is run as lzma, unlzma, or lzcat, simply imply --format=lzma. This means that xz emulating lzma doesn't decompress .xz files, while before this commit it did. The new way is slightly simpler in code and especially in upcoming documentation. commit b0063023f8adb06ea735ec4af5c6f5b7bdb8e84d Author: Lasse Collin Date: Fri May 22 11:29:50 2009 +0300 Make the default memory usage limit 40 % of RAM for both compressing and decompressing. This should be OK now that xz automatically scales down the compression settings if they would exceed the memory usage limit (earlier, the limit for compression was increased to 90 % because low limit broke scripts that used "xz -9" on systems with low RAM). Support spcifying the memory usage limit as a percentage of RAM (e.g. --memory=50%). Support --threads=0 to reset the thread limit to the default value (number of available CPU cores). Use UINT32_MAX instead of SIZE_MAX as the maximum in args.c. hardware.c was already expecting uint32_t value. Cleaned up the output of --help and --long-help. commit 071b825b23911a69dd1cd2f8cda004ef8a781fae Author: Lasse Collin Date: Thu May 21 17:22:01 2009 +0300 Support special value "max" where xz and xzdec accept an integer. Don't round the memory usage limit in xzdec --help to avoid an integer overflow and to not give wrong impression that the limit is high enough when it may not actually be. commit 03ca67fd37dd43fa7f590de340899cd497c10802 Author: ABCD Date: Wed May 20 17:31:18 2009 -0400 Install lzdiff, lzgrep, and lzmore as symlinks This adds lzdiff, lzgrep, and lzmore to the list of symlinks to install. It also installs symlinks for the manual pages and removes the new symlinks on uninstall. commit a6f43e64128a6da5cd641de1e1e527433b3e5638 Author: Lasse Collin Date: Sat May 2 16:10:14 2009 +0300 Use a GCC-specific #pragma instead of GCC-specific -Wno-uninitialized to silence a bogus warning. commit f6ce63ebdb45a857c8949960c83c9580ae888951 Author: Lasse Collin Date: Sat May 2 14:46:50 2009 +0300 Removed --disable-encoder and --disable-decoder. Use the values given to --enable-encoders and --enable-decoders to determine if any encoder or decoder support is wanted. commit be06858d5cf8ba46557395035d821dc332f3f830 Author: Lasse Collin Date: Fri May 1 11:28:52 2009 +0300 Remove docs that are too outdated to be updated (rewrite will be better). commit 0255401e57c96af87c6b159eca28974e79430a82 Author: Lasse Collin Date: Fri May 1 11:21:46 2009 +0300 Added documentation about the legacy .lzma file format. commit 1496ff437c46f38303e0e94c511ca604b3a11f85 Author: Lasse Collin Date: Fri May 1 11:20:23 2009 +0300 Renamed the file format specification to xz-file-format.txt which is the filename used on the WWW. commit 21c6b94373d239d7e86bd480fcd558e30391712f Author: Lasse Collin Date: Tue Apr 28 23:08:32 2009 +0300 Fixed a crash in liblzma. liblzma tries to avoid useless free()/malloc() pairs in initialization when multiple files are handled using the same lzma_stream. This didn't work with filter chains due to comparison of wrong pointers in lzma_next_coder_init(), making liblzma think that no memory reallocation is needed even when it actually is. Easy way to trigger this bug is to decompress two files with a single xz command. The first file should have e.g. x86+LZMA2 as the filter chain, and the second file just LZMA2. commit e518d167aa5958e469982f4fb3a24b9b6a2b5d1c Author: Lasse Collin Date: Wed Apr 15 14:13:38 2009 +0300 Fix uint32_t -> size_t in ARM and ARM-Thumb filters. On 64-bit system it would have gone into infinite loop if a single input buffer was over 4 GiB (unlikely). commit 31decdce041581e57c0d8a407d4795b114ef27ca Author: Lasse Collin Date: Tue Apr 14 11:48:46 2009 +0300 Minor fixes to test files' README. commit 4787d654434891c7df5b43959b0d2873718f06e0 Author: Lasse Collin Date: Mon Apr 13 16:36:41 2009 +0300 Updated history.txt. commit 2f0bc9cd40f709152a0177c8e585c0757e9af9c9 Author: Lasse Collin Date: Mon Apr 13 14:49:48 2009 +0300 Quick & dirty update to support xz in diff/grep/more scripts. commit 02ddf09bc3079b3e17297729b9e43f14d407b8fc Author: Lasse Collin Date: Mon Apr 13 11:27:40 2009 +0300 Put the interesting parts of XZ Utils into the public domain. Some minor documentation cleanups were made at the same time. commit e79c42d854657ae7f75613bd80c1a35ff7c525cb Author: Lasse Collin Date: Fri Apr 10 11:17:02 2009 +0300 Fix off-by-one in LZ decoder. Fortunately, this bug had no security risk other than accepting some corrupt files as valid. commit 94eb9ad46f1fded6d8369cf3d38bb9754c1375af Author: Pavel Roskin Date: Tue Mar 31 12:15:01 2009 -0400 Fix minor typos in README commit 9bab5336ebd765ec4e12252f416eefdf04eba750 Author: Lasse Collin Date: Tue Mar 31 21:52:51 2009 +0300 Add a note and work-around instructions to README about problems detecting a C99 compiler when some standard headers are missing. commit a0497ff7a06f9350349264fe9b52dfefc6d53ead Author: Lasse Collin Date: Wed Mar 18 16:54:38 2009 +0200 Updated THANKS. commit 390e69887fc5e0a108eb41203bed9acd100a3d76 Author: Lasse Collin Date: Wed Mar 18 16:51:41 2009 +0200 Fix wrong macro names in lc_cpucores.m4 and cpucores.h. Thanks to Bert Wesarg. commit 0df9299e2478c2a0c62c05b1ae14a85a353e20d6 Author: Lasse Collin Date: Sun Mar 1 09:03:08 2009 +0200 Test for Linux-specific sysinfo() only on Linux systems. Some other systems have sysinfo() with different semantics. commit cf751edfde3ad6e088dc18e0522d31ae38405933 Author: Lasse Collin Date: Sun Mar 1 09:00:06 2009 +0200 Added AC_CONFIG_MACRO_DIR to configure.ac. commit 63df14c57dee7c461717784287056688482a7eb9 Author: Lasse Collin Date: Sun Mar 1 08:58:41 2009 +0200 Fix the Autoconf test for getopt_long replacement. It was broken by e114502b2bc371e4a45449832cb69be036360722. commit fd6a380f4eda4f00be5f2aa8d222992cd74a714f Author: Lasse Collin Date: Sun Feb 22 19:07:54 2009 +0200 Add a rough explanation of --extreme to output of --help. commit 68bf7ac2984d3627369a240ef0491934d53f7899 Author: Lasse Collin Date: Sun Feb 22 18:52:49 2009 +0200 Fixes to progress message handling in xz: - Don't use Windows-specific code on Windows. The old code required at least Windows 2000. Now it should work on Windows 98 and later, and maybe on Windows 95 too. - Use less precision when showing estimated remaining time. - Fix some small design issues. commit 47c2e21f82242f50f18713a27d644c2c94ab3fea Author: Lasse Collin Date: Wed Feb 18 13:00:10 2009 +0200 Added files missing from the previous commit. commit 489a3dbaa0465f04400804e956a1cfbbee3654a2 Author: Lasse Collin Date: Tue Feb 17 10:43:00 2009 +0200 Added lzma_easy_buffer_encode(). Splitted easy.c into small pieces to avoid unneeded dependencies making statically linked applications bigger than needed. commit 7494816ab08d82f4d6409788825930c4e43cfd0d Author: Lasse Collin Date: Sun Feb 15 15:48:45 2009 +0200 Make physmem.h work on old Windows versions. Thanks to Hongbo Ni for the original patch. commit 11ae4ae35fd70182c713f2d914b7cb1143bc76f0 Author: Lasse Collin Date: Sat Feb 14 20:44:52 2009 +0200 Fix microsecond vs. nanosecond confusion in my_time(). commit 3084d662d2646ab7eb58daf0dc32cf3f9a74eec7 Author: Lasse Collin Date: Sat Feb 14 00:45:29 2009 +0200 Cleanups to the code that detects the amount of RAM and the number of CPU cores. Added support for using sysinfo() on Linux systems whose libc lacks appropriate sysconf() support (at least dietlibc). The Autoconf macros were split into separate files, and CPU core count detection was moved from hardware.c to cpucores.h. The core count isn't used for anything real for now, so a problematic part in process.c was commented out. commit 9c62371eab2706c46b1072f5935e28cb4cd9dca8 Author: Lasse Collin Date: Fri Feb 13 18:23:50 2009 +0200 Initial port to DOS using DJGPP. commit 0dae8b7751d09e9c5a482d5519daaee4800ce203 Author: Lasse Collin Date: Fri Feb 13 18:02:05 2009 +0200 Windows port: Take advantage of the version number macros. Now the version number is not duplicated in the Windows-specific files anymore. commit fdbc0cfa71f7d660855098a609175ba384259529 Author: Lasse Collin Date: Fri Feb 13 18:00:03 2009 +0200 Changed how the version number is specified in various places. Now configure.ac will get the version number directly from src/liblzma/api/lzma/version.h. The intent is to reduce the number of places where the version number is duplicated. In future, support for displaying Git commit ID may be added too. commit 1d924e584b146136989f48c13fff2632896efb3d Author: Lasse Collin Date: Fri Feb 13 17:30:30 2009 +0200 Fix handling of integrity check type in the xz command line tool. commit 96c46df7deb231ea68a03d8d1da9de4c774e36d8 Author: Lasse Collin Date: Fri Feb 13 17:29:02 2009 +0200 Improve support for DOS-like systems. Here DOS-like means DOS, Windows, and OS/2. commit b6a30ee8c2de60ecd722cd05223e4ba72f822e33 Author: Lasse Collin Date: Wed Feb 11 20:02:32 2009 +0200 Remove dead directories from .gitignore. commit 1ec5b0027911d94cb6f98892cbc690f818d8a861 Author: Jim Meyering Date: Wed Feb 11 14:45:14 2009 +0100 .gitignore vs. Makefiles How about this for those of us who do srcdir builds? commit 154f5aec2de201c674841de4fcc9804c2a87af07 Author: Lasse Collin Date: Tue Feb 10 21:48:35 2009 +0200 Removed Makefile from .gitignore since not all Makefiles in the repository are generated by Autotools. People should do test builds in a separate build directory anyway. commit e605c2663691b0a4c307786aa368d124ea081daa Author: Lasse Collin Date: Tue Feb 10 21:48:05 2009 +0200 Added resource files for the Windows build. commit a3bbbe05d32b1f7ea9eb98805df4dda2e811b476 Author: Lasse Collin Date: Mon Feb 9 14:54:31 2009 +0200 Let the user specify custom CFLAGS on the make command line. Previously custom CFLAGS worked only when they were passed to configure. commit 53f7598998b1860a69c51243b5d2e34623c6bf60 Author: Lasse Collin Date: Sun Feb 8 21:35:11 2009 +0200 Fix aliasing issue in physmem.h. commit 0e27028d74c5c7a8e036ae2a9b8cecb0ac79d3a6 Author: Lasse Collin Date: Sun Feb 8 18:24:50 2009 +0200 Add a separate internal function to initialize the CRC32 table, which is used also by LZ encoder. This was needed because calling lzma_crc32() and ignoring the result is a no-op due to lzma_attr_pure. commit ae1ad9af54210c9a2be336b1316532da5071516c Author: Lasse Collin Date: Sun Feb 8 18:17:05 2009 +0200 Make "xz --force" to write to terminal as the error message suggests. commit 79e25eded48d2fe33f31441ab7a034f902e335f8 Author: Lasse Collin Date: Sun Feb 8 10:37:50 2009 +0200 Support both slash and backslash as path component separator on Windows when parsing argv[0]. commit bc7c7109cc4410055a888c1c70cbd1c9445c4361 Author: Lasse Collin Date: Sat Feb 7 23:18:13 2009 +0200 Omit the wrong and (even if corrected) nowadays useless rm from autogen.sh. commit edfc2031e56f8a2ccda063f02936b3a848d88723 Author: Lasse Collin Date: Sat Feb 7 21:41:52 2009 +0200 Updated THANKS. commit 880c3309386aac58fc4f3d7ca99bd31bcb1526a3 Author: Lasse Collin Date: Sat Feb 7 21:17:07 2009 +0200 Make it easy to choose if command line tools should be linked statically or dynamically against liblzma. The default is still to use static liblzma, but it can now be changed by passing --enable-dynamic to configure. Thanks to Mike Frysinger for the original patch. Fixed a few minor bugs in configure.ac. commit 3f86532407e4ace3debb62be16035e009b56ca36 Author: Mike Frysinger Date: Fri Feb 6 23:38:39 2009 -0500 add gitignore files Signed-off-by: Mike Frysinger commit bd7ca1dad5c146b6217799ffaa230c32d207a3e5 Author: Lasse Collin Date: Sat Feb 7 17:07:52 2009 +0200 Assume 32 MiB of RAM on unsupported operating systems like the comment in hardware.c already said. commit d0ab8c1c73ae712adb0d26fbb9da762d99a63618 Author: Lasse Collin Date: Sat Feb 7 16:26:58 2009 +0200 MinGW support: Don't build fastpos_tablegen.c as part of liblzma. Build both static and dynamic liblzma, and also static and dynamic versions of the command line tools. commit bfd91198e44a52bd9bfe3cd6dcae5edab7c6eb45 Author: Lasse Collin Date: Sat Feb 7 15:55:47 2009 +0200 Support LZMA_API_STATIC in assembler files to avoid __declspec(dllexport) equivalent. commit 3306cf3883492720b3c34baa02f4eb4227d91c73 Author: Lasse Collin Date: Sat Feb 7 11:11:50 2009 +0200 Introduced LZMA_API_STATIC macro, which the applications need to #define when linking against static liblzma on platforms like Windows. Most developers don't need to care about LZMA_API_STATIC at all. commit b719e63c5f4c91d2d5e2ea585d4c055ec3767d0b Author: Lasse Collin Date: Fri Feb 6 16:55:45 2009 +0200 Another grammar fix commit fe5434f940f75fec3611cf9d9edf78c4da8ac760 Author: Lasse Collin Date: Fri Feb 6 12:30:23 2009 +0200 Grammar fix in README. commit 3dfa58a9eedf5a0e566452b078801c9cbcf7a245 Author: Lasse Collin Date: Fri Feb 6 10:06:32 2009 +0200 Some MSYS installations (e.g. MsysGit) don't include install.exe, so don't rely on it. commit 975d8fd72a5148d46b2e1745f7a211cf1dfd9d31 Author: Lasse Collin Date: Fri Feb 6 09:13:15 2009 +0200 Recreated the BCJ test files for x86 and SPARC. The old files were linked with crt*.o, which are copyrighted, and thus the old test files were not in the public domain as a whole. They are freely distributable though, but it is better to be careful and avoid including any copyrighted pieces in the test files. The new files are just compiled and assembled object files, and thus don't contain any copyrighted code. commit 094b1b09a531f0d201ec81f2b07346a995fd80b9 Author: Lasse Collin Date: Thu Feb 5 21:21:27 2009 +0200 Add the "windows" directory to EXTRA_DIST. commit e1c3412eec7acec7ca3b32c9c828f3147dc65b49 Author: Lasse Collin Date: Thu Feb 5 09:17:51 2009 +0200 Added initial experimental makefile for use with MinGW. commit 75905a9afc0ee89954ede7d08af70d1148bf0fd9 Author: Lasse Collin Date: Thu Feb 5 09:12:57 2009 +0200 Various code cleanups the the xz command line tool. It now builds with MinGW. commit d0c0b9e94e0af59d1d8f7f4829695d6efe19ccfe Author: Lasse Collin Date: Tue Feb 3 12:15:17 2009 +0200 Another utime() fix. commit ccf92a29e8c7234284f1568c1ec0fd7cb98356ca Author: Lasse Collin Date: Tue Feb 3 10:41:11 2009 +0200 Fix wrong filename argument for utime() and utimes(). This doesn't affect most systems, since most systems have better functions available. commit 99c1c2abfae2e87f3c17e929783e6d1bb7a3f302 Author: Lasse Collin Date: Mon Feb 2 21:19:01 2009 +0200 Updated the x86 assembler code: - Use call/ret pair to get instruction pointer for PIC. - Use PIC only if PIC or __PIC__ is #defined. - The code should work on MinGW and Darwin in addition to GNU/Linux and Solaris. commit 22a0c6dd940b78cdac2f4a4b4b0e7cc0ac15021f Author: Lasse Collin Date: Mon Feb 2 20:14:03 2009 +0200 Modify LZMA_API macro so that it works on Windows with other compilers than MinGW. This may hurt readability of the API headers slightly, but I don't know any better way to do this. commit 8dd7b6052e18621e2e6c62f40f762ee88bd3eb65 Author: Lasse Collin Date: Sun Feb 1 22:40:35 2009 +0200 Fix a bug in lzma_block_buffer_decode(), although this function should be rewritten anyway. commit 55fd41431e61fb8178858283d636b6781e33e847 Author: Lasse Collin Date: Sun Feb 1 22:39:07 2009 +0200 Added initial version of raw buffer-to-buffer coding functions, and cleaned up filter.h API header a little. May be very buggy, not tested yet. commit 3e54ecee5cad30a5ca361a88a99230407abc0699 Author: Lasse Collin Date: Sun Feb 1 00:11:20 2009 +0200 Fix missing newlines in xzdec.c. commit d64ca34f1b6f34e86adefc7f735b4eff8e6d4a35 Author: Lasse Collin Date: Sun Feb 1 00:10:07 2009 +0200 Use __cdecl also for function pointers in liblzma API when on Windows. commit 6a2eb54092fc625d59921a607ff68cd1a90aa898 Author: Lasse Collin Date: Sat Jan 31 11:01:48 2009 +0200 Add LZMA_API to liblzma API headers. It's useful at least on Windows. sysdefs.h no longer #includes lzma.h, so lzma.h has to be #included separately where needed. commit d9993fcb4dfc1f93abaf31ae23b3ef1f3123892b Author: Lasse Collin Date: Sat Jan 31 10:13:09 2009 +0200 Use _WIN32 instead of WIN32 in xzdec.c to test if compiling on Windows. commit 2dbdc5befb33c3703e4609809101047c67caf343 Author: Lasse Collin Date: Sat Jan 31 10:02:52 2009 +0200 Fix two lines in lzma.h on which the # wasn't at the beginning of the line. commit 4ab760109106dc04f39dd81c97d50f528d1b51c1 Author: Lasse Collin Date: Sat Jan 31 09:55:05 2009 +0200 Add support for using liblzma headers in MSVC, which has no stdint.h or inttypes.h. commit b2172cf823d3be34cb0246cb4cb32d105e2a34c9 Author: Lasse Collin Date: Sat Jan 31 08:49:54 2009 +0200 Fix # -> ## in a macro in lzma.h. commit 1aae8698746d3c87a93f8398cdde2de9ba1f7208 Author: Lasse Collin Date: Fri Jan 30 18:50:16 2009 +0200 Updated README. commit f54bcf6f80d585236bc03ce49f7c73e1abaa17eb Author: Lasse Collin Date: Fri Jan 30 00:29:58 2009 +0200 Remove dangling crc64_init.c. commit 982da7ed314398420c38bf154a8f759d5f18b480 Author: Lasse Collin Date: Wed Jan 28 17:16:38 2009 +0200 The .xz file format specification version 1.0.0 is now officially released. The format has been technically the same since 2008-11-19, but now that it is frozen, people can start using it without a fear that the format will break. commit c4683a660b4372156bdaf92f0cdc54a58f95ee6f Author: Lasse Collin Date: Wed Jan 28 08:45:59 2009 +0200 Updated THANKS. commit 3241317093595db9f79104faafe93cb989c9f858 Author: Lasse Collin Date: Wed Jan 28 08:43:26 2009 +0200 Fix uninitialized variables in alone_decoder.c. This bug was triggered by the previous commit, since these variables were not used by anything before support for a preset dictionary. commit f76e39cf930f888d460b443d18f977ebedea8b2a Author: Lasse Collin Date: Tue Jan 27 18:36:05 2009 +0200 Added initial support for preset dictionary for raw LZMA1 and LZMA2. It is not supported by the .xz format or the xz command line tool yet. commit 449b8c832b26c3633f3bec60095e57d2d3ada1f3 Author: Lasse Collin Date: Mon Jan 26 20:09:17 2009 +0200 Regenerate the CRC tables without trailing blanks. commit 850f7400428dc9c5fd08a2f35a5bd2c9e45aede2 Author: Jim Meyering Date: Mon Jan 19 21:37:16 2009 +0100 remove trailing blanks from all but .xz files commit 667481f1aad34e1ed15738e7913a9c7e256b4cf5 Author: Lasse Collin Date: Mon Jan 26 14:34:10 2009 +0200 Add lzma_block_buffer_decode(). commit 5fb34d8324d3e7e0061df25d0086b64c8726b19d Author: Lasse Collin Date: Mon Jan 26 14:33:28 2009 +0200 Add more sanity checks to lzma_stream_buffer_decode(). commit c129748675a5daa8838df92bde32cc04f6ce61ba Author: Lasse Collin Date: Mon Jan 26 14:33:13 2009 +0200 Avoid hardcoded constant in easy.c. commit 1859d22d75e072463db74c25bc3f5a7992e5fdf6 Author: Lasse Collin Date: Mon Jan 26 13:06:49 2009 +0200 Tiny bit better sanity check in block_util.c commit 2c5fe958e4bbe9b147b10c255955dfe2827fb8e7 Author: Lasse Collin Date: Sun Jan 25 01:35:56 2009 +0200 Fix a dumb bug in Block decoder, which made it return LZMA_DATA_ERROR with valid data. The bug was added in e114502b2bc371e4a45449832cb69be036360722. commit c81f13ff29271de7293f8af3d81848b1dcae3d19 Author: Lasse Collin Date: Fri Jan 23 22:27:50 2009 +0200 Added lzma_stream_buffer_decode() and made minor cleanups. commit 0b3318661ce749550b8531dfd469639a08930391 Author: Lasse Collin Date: Thu Jan 22 12:53:33 2009 +0200 Fix a comment. commit 9ec80355a7212a0a2f8c89d98e51b1d8b4e34eec Author: Lasse Collin Date: Tue Jan 20 16:37:27 2009 +0200 Add some single-call buffer-to-buffer coding functions. commit d8b58d099340f8f4007b24b211ee41a7210c061c Author: Lasse Collin Date: Tue Jan 20 13:45:41 2009 +0200 Block encoder cleanups commit 0c09810cb3635cb575cb54e694d41523e7d0a335 Author: Lasse Collin Date: Tue Jan 20 10:35:15 2009 +0200 Use LZMA_PROG_ERROR in lzma_code() as documented in base.h. commit 2f1a8e8eb898f6c036cde55d153ad348bfab3c00 Author: Lasse Collin Date: Mon Jan 19 22:53:18 2009 +0200 Fix handling of non-fatal errors in lzma_code(). commit 4810b6bc25087be872960b9dd1d11ff07735dc88 Author: Lasse Collin Date: Mon Jan 19 14:00:33 2009 +0200 Move some LZMA2 constants to lzma2_encoder.h so that they can be used outside lzma2_encoder.c. commit 00be5d2e09f9c7a6a8563465ad8b8042866817a4 Author: Lasse Collin Date: Mon Jan 19 13:52:36 2009 +0200 Remove dead code. commit 128586213f77c9bd82b7e9a62927f6d0c3769d85 Author: Lasse Collin Date: Sat Jan 17 14:24:25 2009 +0200 Beta was supposed to be API stable but I had forgot to rename lzma_memlimit_encoder and lzma_memlimit_decoder to lzma_raw_encoder_memlimit and lzma_raw_decoder_memlimit. :-( Now it is fixed. Hopefully it doesn't cause too much trouble to those who already thought API is stable. commit b056379490be5c584c264a967f0540041a163a1e Author: Lasse Collin Date: Thu Jan 15 14:29:22 2009 +0200 Updated THANKS. commit dc8f3be06d54ef6e6cfb5134dd3d25edd08cef89 Author: Lasse Collin Date: Thu Jan 15 14:27:32 2009 +0200 Fixed a bug in 7z2lzma.bash to make it work with .7z files that use something else than 2^n as the dictionary size. Thanks to Dan Shechter for the bug report. commit 8286a60b8f4bd5accfbc9d229d2204bac31994f2 Author: Lasse Collin Date: Wed Jan 7 18:41:15 2009 +0200 Use pthread_sigmask() instead of sigprocmask() when pthreads are enabled. commit 4fd43cb3a906f6da2943f69239ee984c4787c9a9 Author: Lasse Collin Date: Wed Dec 31 20:01:00 2008 +0200 Bumped version to 4.999.8beta right after the release of 4.999.7beta. commit 061748f5932719643cda73383db715167d543c22 Author: Lasse Collin Date: Wed Dec 31 18:59:02 2008 +0200 Disable Subblock filter from test_compress.sh since it is disabled by default in configure.ac. commit 9c45658ddc8bd4a7819ef8547d3e7ccf73203e78 Author: Lasse Collin Date: Wed Dec 31 17:44:20 2008 +0200 Disable both Subblock encoder and decoder my default, since they are not finished and may have security issues too. commit b59f1e98f50694cf6a8f1b342fd878feebdb2f88 Author: Lasse Collin Date: Wed Dec 31 17:42:50 2008 +0200 Update some files in debug directory. commit d1d17a40d33a9682424ca37282813492f2cba6d0 Author: Lasse Collin Date: Wed Dec 31 17:41:46 2008 +0200 Prepare for 4.999.7beta release. commit 88d3e6b0b18e24142b6d3b41dc1b84b00c49fef3 Author: Lasse Collin Date: Wed Dec 31 17:15:03 2008 +0200 Cleaned up some comments in the API headers. commit 322ecf93c961e45a1da8c4a794a7fdacefcd7f40 Author: Lasse Collin Date: Wed Dec 31 16:29:39 2008 +0200 Renamed lzma_options_simple to lzma_options_bcj in the API. The internal implementation is still using the name "simple". It may need some cleanups, so I look at it later. commit 7eea8bec3abfed883efba66264a1452a1c04f6b0 Author: Lasse Collin Date: Wed Dec 31 00:57:27 2008 +0200 Fixed missing quoting in configure.ac. commit 28e75f7086dbe9501d926c370375c69dfb1236ce Author: Lasse Collin Date: Wed Dec 31 00:48:23 2008 +0200 Updated src/liblzma/Makefile.am to use liblzma.pc.in, which should have been in the previous commit. commit 7ed9d943b31d3ee9c5fb2387e84a241ba33afe90 Author: Lasse Collin Date: Wed Dec 31 00:30:49 2008 +0200 Remove lzma_init() and other init functions from liblzma API. Half of developers were already forgetting to use these functions, which could have caused total breakage in some future liblzma version or even now if --enable-small was used. Now liblzma uses pthread_once() to do the initializations unless it has been built with --disable-threads which make these initializations thread-unsafe. When --enable-small isn't used, liblzma currently gets needlessly linked against libpthread (on systems that have it). While it is stupid for now, liblzma will need threads in future anyway, so this stupidity will be temporary only. When --enable-small is used, different code CRC32 and CRC64 is now used than without --enable-small. This made the resulting binary slightly smaller, but the main reason was to clean it up and to handle the lack of lzma_init_check(). The pkg-config file lzma.pc was renamed to liblzma.pc. I'm not sure if it works correctly and portably for static linking (Libs.private includes -pthread or other operating system specific flags). Hopefully someone complains if it is bad. lzma_rc_prices[] is now included as a precomputed array even with --enable-small. It's just 128 bytes now that it uses uint8_t instead of uint32_t. Smaller array seemed to be at least as fast as the more bloated uint32_t array on x86; hopefully it's not bad on other architectures. commit 5cda29b5665004fc0f21d0c41d78022a6a559ab2 Author: Lasse Collin Date: Sat Dec 27 19:40:31 2008 +0200 Use 28 MiB as memory usage limit for encoding in test_compress.sh. commit 050eb14d29e2537c014662e83599fd8a77f13c45 Author: Lasse Collin Date: Sat Dec 27 19:32:20 2008 +0200 Revert a change made in 3b34851de1eaf358cf9268922fa0eeed8278d680 that was related to LZMA_MODE_FAST. The original code is slightly faster although it compresses slightly worse. But since it is fast mode, it is better to select the faster version. commit 4820f10d0f173864f6a2ea7479663b509ac53358 Author: Lasse Collin Date: Sat Dec 27 19:30:19 2008 +0200 Some xz command line tool improvements. commit e33194e79d8f5ce07cb4aca909b324ae75098f7e Author: Lasse Collin Date: Sat Dec 27 19:27:49 2008 +0200 Bunch of liblzma tweaks, including some API changes. The API and ABI should now be very close to stable, although the code behind it isn't yet. commit 4d00652e75dd2736aedc3a3a8baff3dd0ea38074 Author: Lasse Collin Date: Thu Dec 18 13:42:52 2008 +0200 Updated Makefile.am that was missing from the previous commit. commit 634636fa56ccee6e744f78b0abed76c8940f2f8f Author: Lasse Collin Date: Wed Dec 17 21:49:53 2008 +0200 Remove the alignment functions for now. Maybe they will be added back in some form later, but the current version wasn't modular, so it would need fixing anyway. commit 4fed98417d1687f5eccccb42a133fde3ec81216a Author: Lasse Collin Date: Wed Dec 17 20:11:23 2008 +0200 xz message handling improvements commit 653e457e3756ef35e5d1b2be3523b3e4b1e9ee4d Author: Lasse Collin Date: Mon Dec 15 23:26:43 2008 +0200 Fix a dumb bug in .lzma decoder which was introduced in the previous commit. (Probably the previous commit has other bugs too, it wasn't tested.) commit 671a5adf1e844bfdd6fd327016c3c28694493158 Author: Lasse Collin Date: Mon Dec 15 19:39:13 2008 +0200 Bunch of liblzma API cleanups and fixes. commit 17781c2c20fd77029cb32e77792889f2f211d69d Author: Lasse Collin Date: Mon Dec 15 14:26:52 2008 +0200 The LZMA2 decoder fix introduced a bug to LZ decoder, which made LZ decoder return too early after dictionary reset. This fixes it. commit f9f2d1e74398500724041f7fb3c38db35ad8c8d8 Author: Lasse Collin Date: Mon Dec 15 11:20:22 2008 +0200 Added two new test files. commit ff7fb2c605bccc411069e07b9f11fb957aea2ddf Author: Lasse Collin Date: Mon Dec 15 10:01:59 2008 +0200 Fix data corruption in LZMA2 decoder. commit 1ceebcf7e1bd30b95125f0ad67a09fdb6215d613 Author: Lasse Collin Date: Sat Dec 13 00:54:11 2008 +0200 Name the package "xz" in configure.ac. commit a94bf00d0af9b423851905b031be5a645a657820 Author: Lasse Collin Date: Fri Dec 12 22:43:21 2008 +0200 Some adjustments to GCC warning flags. The important change is the removal of -pedantic. It messes up -Werror (which I really want to keep so that I don't miss any warnings) with printf format strings that are in POSIX but not in C99. commit 8582d392baacd2cdac07ca60041f8c661323676d Author: Lasse Collin Date: Wed Dec 10 01:31:00 2008 +0200 Remove obsolete comment. commit b1ae6dd731ea3636c3c2bfc7aefa71457d3328f1 Author: Lasse Collin Date: Wed Dec 10 01:27:15 2008 +0200 Use "decompression" consistently in --long-help. commit 1ea9e7f15afd5d3981e2432710e932320597bca9 Author: Lasse Collin Date: Wed Dec 10 01:23:58 2008 +0200 Added preset=NUM to --lzma1 and --lzma2. This makes it easy to take a preset as a template and modify it a little. commit bceb3918dbb21f34976bfdd4c171a81319de71f7 Author: Lasse Collin Date: Tue Dec 9 17:43:31 2008 +0200 Put the file format specification into the public domain. Same will be done to the actual code later. commit 6efa2d80d46a38861016f41f0eb6fa2ec9260fe6 Author: Lasse Collin Date: Tue Dec 9 17:41:49 2008 +0200 Make the memusage functions of LZMA1 and LZMA2 encoders to validate the filter options. Add missing validation to LZMA2 encoder when options are changed in the middle of encoding. commit f20a03206b71ff01b827bb7a932411d6a6a4e06a Author: Lasse Collin Date: Tue Dec 9 10:36:24 2008 +0200 Updated THANKS. commit ef7890d56453dca1aeb2e12db29b7e418d93dde4 Author: Lasse Collin Date: Mon Dec 1 23:04:12 2008 +0200 In command line tool, take advantage of memusage calculation's ability to also validate the filter chain and options (not implemented yet for all filters). commit ccd57afa09e332d664d6d6a7498702791ea5f659 Author: Lasse Collin Date: Mon Dec 1 22:59:28 2008 +0200 Validate the filter chain before checking filter-specific memory usage. commit c596fda40b62fe1683d0ac34d0c673dcaae2aa15 Author: Lasse Collin Date: Mon Dec 1 22:58:22 2008 +0200 Make the memusage functions of LZMA1 and LZMA2 decoders to validate the filter options. commit c58f469be5bb9b0bdab825c6687445fd553f4f3a Author: Lasse Collin Date: Mon Dec 1 22:55:18 2008 +0200 Added the changes for Delta filter that should have been part of 656ec87882ee74b192c4ea4a233a235eca7b04d4. commit cd708015202dbf7585b84a8781462a20c42a324b Author: Lasse Collin Date: Mon Dec 1 22:50:28 2008 +0200 LZMA2 decoder cleanups. Make it require new LZMA properties also in the first LZMA chunk after a dictionary reset in uncompressed chunk. commit 656ec87882ee74b192c4ea4a233a235eca7b04d4 Author: Lasse Collin Date: Mon Dec 1 16:30:11 2008 +0200 Added lzma_delta_coder_memusage() which also validates the options. commit 691a9155b7a28882baf37e9d1e969e32e91dbc7a Author: Lasse Collin Date: Sat Nov 29 10:03:49 2008 +0200 Automake includes the m4 directory, so don't add it in Makefile.am separately. Updated THANKS. commit c7007ddf06ac2b0e018d71d281c21b99f16e7ae0 Author: Lasse Collin Date: Fri Nov 28 12:00:48 2008 +0200 Tested using COLUMNS environment variable to avoid broken progress indicator but since COLUMNS isn't usually available, the code was left commented out. commit ae65dcfde27014e4d811e1a1308aa5d0fe8debbd Author: Lasse Collin Date: Thu Nov 27 19:28:59 2008 +0200 Cleanups to message.c. commit a8368b75cdcd5427299001cc42839287f27b244d Author: Lasse Collin Date: Tue Nov 25 02:37:47 2008 +0200 Remove the nowadays unneeded memory limitting malloc() wrapper. commit 69472ee5f055a2bb6f28106f0923e1461fd1d080 Author: Lasse Collin Date: Sun Nov 23 15:09:03 2008 +0200 VLI encoder and decoder cleanups. Made encoder return LZMA_PROG_ERROR in single-call mode if there's no output space. commit 4249c8c15a08f55b51b7012e6aaafce3aa9eb650 Author: Lasse Collin Date: Sat Nov 22 17:44:33 2008 +0200 Typo fix commit 6d1d6f4598d121253dbe1084c6866b66e95c361b Author: Lasse Collin Date: Thu Nov 20 22:59:10 2008 +0200 Support NetBSD's errno for O_NOFOLLOW. commit f901a290eef67b8ea4720ccdf5f46edf775ed9d7 Author: Lasse Collin Date: Thu Nov 20 18:05:52 2008 +0200 Build xzdec and lzmadec from xzdec.c. xzdec supports only .xz files and lzmadec only .lzma files. commit 86a0ed8f01c8ed44721223f885e679c71b7bb94c Author: Lasse Collin Date: Thu Nov 20 11:01:29 2008 +0200 Minor cleanups to xzdec. commit 54f716ba8905d09752dcd1519455a40bd21d5317 Author: Lasse Collin Date: Wed Nov 19 23:55:22 2008 +0200 Added missing check for uint16_t. commit 1880a3927b23f265f63b2adb86fbdb81ea09eb06 Author: Lasse Collin Date: Wed Nov 19 23:52:24 2008 +0200 Renamed lzma to xz and lzmadec to xzdec. We create symlinks lzma, unlzma, and lzcat in "make install" for backwards compatibility with LZMA Utils 4.32.x; I'm not sure if this should be the default though. commit e114502b2bc371e4a45449832cb69be036360722 Author: Lasse Collin Date: Wed Nov 19 20:46:52 2008 +0200 Oh well, big messy commit again. Some highlights: - Updated to the latest, probably final file format version. - Command line tool reworked to not use threads anymore. Threading will probably go into liblzma anyway. - Memory usage limit is now about 30 % for uncompression and about 90 % for compression. - Progress indicator with --verbose - Simplified --help and full --long-help - Upgraded to the last LGPLv2.1+ getopt_long from gnulib. - Some bug fixes commit 3c3905b53462ae235c9438d86a4dc51086410932 Author: Lasse Collin Date: Thu Oct 9 11:12:29 2008 +0300 Fixed the test that should have been fixed as part of 1e8e4fd1f3e50129b4541406ad765d2aa1233943. commit 0f295bf7a3ece01f667caae318cc3e3424085886 Author: Lasse Collin Date: Tue Oct 7 16:42:18 2008 +0300 Fixed some help messages. commit 1e8e4fd1f3e50129b4541406ad765d2aa1233943 Author: Lasse Collin Date: Tue Oct 7 09:40:31 2008 +0300 Made the preset numbering more logical in liblzma API. commit 5e4df4c3c09c82bbbb1a916784e3dc717ca4ff81 Author: Lasse Collin Date: Fri Oct 3 19:36:09 2008 +0300 Removed fi from po/LINGUAS. commit fcfb86c7770328cfffa2e83b176af9a1ba2d9128 Author: Lasse Collin Date: Fri Oct 3 07:06:48 2008 +0300 Fixed suffix handling with --format=raw. commit bd137524f2f50e30ba054f42f1f6536cd3cee920 Author: Lasse Collin Date: Thu Oct 2 22:51:46 2008 +0300 Initial changes to change the suffix of the new format to .xz. This also fixes a bug related to --suffix option. Some issues with suffixes with --format=raw were not fixed. commit 4c321a41c482821aa3c4d64cdf886a6ed904d844 Author: Lasse Collin Date: Tue Sep 30 17:43:55 2008 +0300 Renamed the test files from .lzma suffix to .xz suffix. commit 8e60c889a2816a63013a35c99ce26bf28f5b78eb Author: Lasse Collin Date: Tue Sep 30 13:57:44 2008 +0300 Fixed Stream decoder to actually use the first_stream variable. commit 3bdbc12c054d1961133ee19802af7dd3c3494543 Author: Lasse Collin Date: Tue Sep 30 13:56:57 2008 +0300 Added one more test file. commit a6639022fdc536e5659b070a465221b4cf7c51fa Author: Lasse Collin Date: Tue Sep 30 13:34:07 2008 +0300 Fixed uninitialized variable in Stream decoder. commit ed3709000a3f17ecefab29b2235d7e2221b00003 Author: Lasse Collin Date: Tue Sep 30 13:27:28 2008 +0300 Added two test files. commit ea560b0ea80525752bdcd0074d24f8dc170bbe29 Author: Lasse Collin Date: Sat Sep 27 23:49:24 2008 +0300 Fix conflicting Subblock helper filter's ID. commit ad97483b6e55142fd8d5c041db057017a891cd95 Author: Lasse Collin Date: Sat Sep 27 23:37:13 2008 +0300 Changed magic bytes to match the updated spec. Filename suffix wasn't changed yet. commit 7a57069167e9e63394e2b095ee3a63253fcb51c7 Author: Lasse Collin Date: Sat Sep 27 23:16:09 2008 +0300 Remove po/fi.po since I'm not keeping it updated for now. commit 018ae09df8f2fee5a7374f307df4cb42fad0b81e Author: Lasse Collin Date: Sat Sep 27 23:13:54 2008 +0300 Fix also test_compress.sh. commit 3a62a5fb85d2eebd8666e64ed5d364d095062858 Author: Lasse Collin Date: Sat Sep 27 23:01:15 2008 +0300 Fixed compilation of test_filter_flags.c, which was broken by 1dcecfb09b55157b8653d747963069c8bed74f04. commit c6ca26eef7cd07eba449035514e2b8f9ac3111c0 Author: Lasse Collin Date: Sat Sep 27 19:11:02 2008 +0300 Updated file format specification. It changes the suffix of the new format to .xz and removes the recently added LZMA filter. commit 1dcecfb09b55157b8653d747963069c8bed74f04 Author: Lasse Collin Date: Sat Sep 27 19:09:21 2008 +0300 Some API changes, bug fixes, cleanups etc. commit 5cc5064cae603b649c64c40125c7dd365de54c9d Author: Lasse Collin Date: Sat Sep 27 11:28:49 2008 +0300 Added 7z2lzma.bash. commit f147666a5cd15542d4e427da58629f4a71cc38e1 Author: Lasse Collin Date: Wed Sep 17 22:11:39 2008 +0300 Miscellaneous LZ and LZMA encoder cleanups commit 13d68b069849e19c33822cd8996cd6447890abb1 Author: Lasse Collin Date: Sat Sep 13 13:54:00 2008 +0300 LZ decoder cleanup commit 13a74b78e37f16c9096ba5fe1859cc04eaa2f9f7 Author: Lasse Collin Date: Sat Sep 13 12:10:43 2008 +0300 Renamed constants: - LZMA_VLI_VALUE_MAX -> LZMA_VLI_MAX - LZMA_VLI_VALUE_UNKNOWN -> LZMA_VLI_UNKNOWN - LZMA_HEADER_ERRRO -> LZMA_OPTIONS_ERROR commit 320601b2c7b08fc7da9da18d5bf7c3c1a189b080 Author: Lasse Collin Date: Fri Sep 12 22:41:40 2008 +0300 Improved the Stream Flags handling API. commit ec490da5228263b25bf786bb23d1008468f55b30 Author: Lasse Collin Date: Thu Sep 11 23:10:44 2008 +0300 Simplified debug/known_sizes.c to match the relaxed requirements of Block encoder. commit 16e8b98f2659347edfa74afdbbb9e73311153cb9 Author: Lasse Collin Date: Thu Sep 11 23:09:24 2008 +0300 Remove a check from Block encoder that should have already been removed in 2ba01bfa755e47ff6af84a978e3c8d63d7d2775e. commit 5a710c3805bdf6d7e3c92e954e4e4565b27bcb13 Author: Lasse Collin Date: Thu Sep 11 20:02:38 2008 +0300 Remove bogus #includes. commit 01892b2ca5f69bed0ea746e04b604030d57806bb Author: Lasse Collin Date: Thu Sep 11 10:49:14 2008 +0300 Updated THANKS. commit 962f2231d49409fe6852e44ffe8c5dbabb04bc7d Author: Lasse Collin Date: Thu Sep 11 10:48:12 2008 +0300 Fix a compiler error on big endian systems that don't support unaligned memory access. commit fa3ab0df8ae7a8a1ad55b52266dc0fd387458671 Author: Lasse Collin Date: Thu Sep 11 10:46:14 2008 +0300 Silence a compiler warning. commit 9373e81e18822db4972819442ea4c2cb9955470b Author: Lasse Collin Date: Wed Sep 10 19:16:32 2008 +0300 Bumped version to 4.999.6alpha. commit cb072b7c8442ba68bb0c62c0abbbe939794887a3 Author: Lasse Collin Date: Wed Sep 10 17:02:00 2008 +0300 Check for LZMA_FILTER_RESERVED_START in filter_flags_encoder.c. Use LZMA_PROG_ERROR instead of LZMA_HEADER_ERROR if the Filter ID is in the reserved range. This allows Block Header encoder to detect unallowed Filter IDs, which is good for Stream encoder. commit 123ab0acec435c9e9866a99e30482116cfbd9ba5 Author: Lasse Collin Date: Wed Sep 10 16:44:32 2008 +0300 Filter handling cleanups commit 9cfcd0c4f2f865d8fbbb46ea28344a9be0dd8ad1 Author: Lasse Collin Date: Wed Sep 10 00:33:00 2008 +0300 Comments commit 2ba01bfa755e47ff6af84a978e3c8d63d7d2775e Author: Lasse Collin Date: Wed Sep 10 00:27:02 2008 +0300 Cleaned up Block encoder and moved the no longer shared code from block_private.h to block_decoder.c. Now the Block encoder doesn't need compressed_size and uncompressed_size from lzma_block structure to be initialized. commit 07efcb5a6bc5d7018798ebd728586f84183e7d64 Author: Lasse Collin Date: Sun Sep 7 10:23:13 2008 +0300 Changed Filter ID of LZMA to 0x20. commit 32fe5fa541e82c08e054086279079ae5016bd8d8 Author: Lasse Collin Date: Sat Sep 6 23:42:50 2008 +0300 Comments commit 0a31ed9d5e3cde4feb094b66f3a8b2c074605d84 Author: Lasse Collin Date: Sat Sep 6 15:14:30 2008 +0300 Some API cleanups commit da98df54400998be2a6c3876f9655a3c51b93c10 Author: Lasse Collin Date: Thu Sep 4 11:53:06 2008 +0300 Added support for raw encoding and decoding to the command line tool, and made various cleanups. --lzma was renamed to --lzma1 to prevent people from accidentally using LZMA when they want LZMA2. commit 2496aee8a7741a8a0d42987db41ff2cf1a4bdabd Author: Lasse Collin Date: Thu Sep 4 10:39:15 2008 +0300 Don't allow LZMA_SYNC_FLUSH with decoders anymore. There's simply nothing that would use it. Allow LZMA_FINISH to the decoders, which will usually ignore it (auto decoder and Stream decoder being exceptions). commit bea301c26d5d52675e11e0236faec0492af98f60 Author: Lasse Collin Date: Wed Sep 3 17:06:25 2008 +0300 Minor updates to the file format specification. commit 9c75b089b4a9e0edcf4cf7970a4383768707d6c8 Author: Lasse Collin Date: Tue Sep 2 19:33:32 2008 +0300 Command line tool fixes commit bab0590504b5aeff460ab4ca8c964dd7c1bad9e4 Author: Lasse Collin Date: Tue Sep 2 19:31:42 2008 +0300 Auto decoder cleanup commit 689602336d126a46b60d791a67decab65e1e81f5 Author: Lasse Collin Date: Tue Sep 2 19:12:12 2008 +0300 Updated auto decoder to handle LZMA_CONCATENATED when decoding LZMA_Alone files. Decoding of concatenated LZMA_Alone files is intentionally not supported, so it is better to put this in auto decoder than LZMA_Alone decoder. commit 80c4158f19904026433eb6f5d5ca98a0ecd4f66c Author: Lasse Collin Date: Tue Sep 2 14:56:52 2008 +0300 Stream decoder cleanups commit fc681657450ce57be1fe08f7a15d31dcc705e514 Author: Lasse Collin Date: Tue Sep 2 11:45:39 2008 +0300 Some fixes to LZ encoder. commit ede675f9ac1ca82a7d7c290324adba672118bc8d Author: Lasse Collin Date: Sun Aug 31 11:47:01 2008 +0300 Fix wrong pointer calculation in LZMA encoder. commit 3b34851de1eaf358cf9268922fa0eeed8278d680 Author: Lasse Collin Date: Thu Aug 28 22:53:15 2008 +0300 Sort of garbage collection commit. :-| Many things are still broken. API has changed a lot and it will still change a little more here and there. The command line tool doesn't have all the required changes to reflect the API changes, so it's easy to get "internal error" or trigger assertions. commit 57b9a145a527f0716822615e5ed536d33aebd3fc Author: Lasse Collin Date: Fri Jun 20 17:16:32 2008 +0300 Fix test_filter_flags to match the new restriction of lc+lp. commit eaafc4367c77ec1d910e16d11b4da293969d97a3 Author: Lasse Collin Date: Fri Jun 20 16:19:54 2008 +0300 Remove some redundant code from LZMA encoder. commit 0809c46534fa5664fe35d9e98d95e87312ed130e Author: Lasse Collin Date: Thu Jun 19 16:35:08 2008 +0300 Add limit of lc + lp <= 4. Now we can allocate the literal coder as part of the main LZMA encoder or decoder structure. Make the LZMA decoder to rely on the current internal API to free the allocated memory in case an error occurs. commit d25ab1b96178f06a0e724f58e3cd68300b2b1275 Author: Lasse Collin Date: Wed Jun 18 21:45:19 2008 +0300 Comments commit 6368a2fa5901c75864be5171dd57a50af7adbb41 Author: Lasse Collin Date: Wed Jun 18 19:19:02 2008 +0300 Delete old code that was supposed to be already deleted from test_block_header.c. commit 7d17818cec8597f847b0a2537fde991bbc3d9e96 Author: Lasse Collin Date: Wed Jun 18 18:02:10 2008 +0300 Update the code to mostly match the new simpler file format specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided. commit bf6348d1a3ff09fdc06940468f318f75ffa6af11 Author: Lasse Collin Date: Tue Jun 17 15:03:46 2008 +0300 Update the file format specification draft. The new one is a lot simpler than the previous versions, but it also means that the existing code will change a lot. commit 803194ddd26f01ff60ba4e9924c6087a56b29827 Author: Lasse Collin Date: Wed Jun 11 21:42:47 2008 +0300 Fix uninitialized variable in LZMA encoder. This was introduced in 369f72fd656f537a9a8e06f13e6d0d4c242be22f. commit 0ea98e52ba87453497b1355c51f13bad55c8924a Author: Lasse Collin Date: Wed Jun 11 15:08:44 2008 +0300 Improve command line integer parsing a little in lzma and lzmadec to make them accept also KiB in addition Ki etc. Fix also memory usage information in lzmadec --help. commit 436fa5fae96d4e35759aed33066060f09ee8c6ef Author: Lasse Collin Date: Tue Jun 10 20:36:12 2008 +0300 s/decompressed/compressed/ in the command line tool's error message. commit 369f72fd656f537a9a8e06f13e6d0d4c242be22f Author: Lasse Collin Date: Sun Jun 1 12:48:17 2008 +0300 Fix a buffer overflow in the LZMA encoder. It was due to my misunderstanding of the code. There's no tiny fix for this problem, so I also cleaned up the code in general. This reduces the speed of the encoder 2-5 % in the fastest compression mode ("lzma -1"). High compression modes should have no noticeable performance difference. This commit breaks things (especially LZMA_SYNC_FLUSH) but I will fix them once the new format and LZMA2 has been roughly implemented. Plain LZMA won't support LZMA_SYNC_FLUSH at all and won't be supported in the new .lzma format. This may change still but this is what it looks like now. Support for known uncompressed size (that is, LZMA or LZMA2 without EOPM) is likely to go away. This means there will be API changes. commit e55e0e873ce2511325749d415ae547d62ab5f00d Author: Lasse Collin Date: Fri May 30 11:53:41 2008 +0300 Typo fixes from meyering. commit ed6664146fcbe9cc4a3b23b31632182ed812ea93 Author: Lasse Collin Date: Sun May 11 14:24:42 2008 +0300 Remove support for pre-C89 libc versions that lack memcpy, memmove, and memset. commit b09464bf9ae694afc2d1dc26188ac4e2e8af0a63 Author: Lasse Collin Date: Sun May 11 14:17:21 2008 +0300 Improved C99 compiler detection in configure.ac. It will pass -std=gnu99 instead of -std=c99 to GCC now, but -pedantic should still give warnings about GNU extensions like before except with some special keywords like asm(). commit 11de5d5267f7a0a7f0a4d34eec147e65eaf9f9cf Author: Lasse Collin Date: Tue May 6 15:15:07 2008 +0300 Bunch of grammar fixes from meyering. commit dc192b6343ae36276c85fcf7ef6006147816eadc Author: Lasse Collin Date: Tue May 6 13:41:05 2008 +0300 Typo fix commit 944b62b93239b27b338d117f2668c0e95849659b Author: Lasse Collin Date: Sun May 4 22:29:27 2008 +0300 Don't print an error message on broken pipe unless --verbose is used. commit 8e074349e47ea6832b8fdf9244e581d453733433 Author: Lasse Collin Date: Wed Apr 30 22:16:17 2008 +0300 Fix a crash with --format=alone if other filters than LZMA are specified on the command line. commit 2f361ac19b7fd3abcd362de4d470e6a9eb495b73 Author: Lasse Collin Date: Mon Apr 28 17:08:27 2008 +0300 Updated THANKS. commit 3be21fb12f4cec2cf07799e8960382f4cb375369 Author: Lasse Collin Date: Mon Apr 28 17:06:34 2008 +0300 Fixed wrong spelling "limitter" to "limiter". This affects liblzma's API. commit beeb81060821dfec4e7898e0d44b7900dcb2215e Author: Lasse Collin Date: Fri Apr 25 15:39:50 2008 +0300 Prevent LZ encoder from hanging with known uncompressed size. The "fix" breaks LZMA_SYNC_FLUSH at end of stream with known uncompressed size, but since it currently seems likely that support for encoding with known uncompressed size will go away anyway, I'm not fixing this problem now. commit c324325f9f13cdeb92153c5d00962341ba070ca2 Author: Lasse Collin Date: Fri Apr 25 13:58:56 2008 +0300 Removed src/liblzma/common/sysdefs.h symlink, which was annoying, because "make dist" put two copies of sysdefs.h into the tarball instead of the symlink. commit d3ba30243c75c13d094de1793f9c58acdbacc692 Author: Lasse Collin Date: Fri Apr 25 13:41:29 2008 +0300 Added memusage.c to debug directory. commit 8f804c29aa8471ccd6438ddca254092b8869ca52 Author: Lasse Collin Date: Fri Apr 25 13:32:35 2008 +0300 Bumped version number to 4.999.3alpha. It will become 5.0.0 once we have a stable release (won't be very soon). The version number is no longer related to version of LZMA SDK. Made some small Automake-related changes to toplevel Makefile.am and configure.ac. commit c99037ea10f121cbacf60c37a36c29768ae53447 Author: Lasse Collin Date: Thu Apr 24 20:25:39 2008 +0300 Fix a memory leak by calling free(extra->data) in lzma_extra_free(). commit 22ba3b0b5043fa481903482ce85015fe775939e5 Author: Lasse Collin Date: Thu Apr 24 20:23:05 2008 +0300 Make unlzma and lzcat symlinks. commit 17c36422d4cbc2c70d5c83ec389406f92cd9e85e Author: Lasse Collin Date: Thu Apr 24 20:20:27 2008 +0300 Fixed a bug in command line option parsing. commit 283f939974c32c47f05d495e8dea455ec646ed64 Author: Lasse Collin Date: Thu Apr 24 20:19:20 2008 +0300 Added two assert()s. commit eb348a60b6e19a7c093f892434f23c4756973ffd Author: Lasse Collin Date: Thu Apr 24 19:22:53 2008 +0300 Switch to uint16_t as the type of range coder probabilities. commit 6c5306e312bcfd254cf654f88c04e34ba786df3d Author: Lasse Collin Date: Thu Apr 24 18:39:57 2008 +0300 Fix wrong return type (uint32_t -> bool). commit 712cfe3ebfd24df24d8896b1315c53c3bc4369c8 Author: Lasse Collin Date: Thu Apr 24 18:38:00 2008 +0300 Fix data corruption in LZ encoder with LZMA_SYNC_FLUSH. commit bc04486e368d20b3027cde625267762aae063965 Author: Lasse Collin Date: Thu Apr 24 17:33:01 2008 +0300 Fix fastpos problem in Makefile.am when built with --enable-small. commit 7ab493924e0ed590a5121a15ee54038d238880d3 Author: Lasse Collin Date: Thu Apr 24 17:30:51 2008 +0300 Use 64-bit integer as range encoder's cache size. This fixes a theoretical data corruption, which should be very hard to trigger even intentionally. commit 641998c3e1ecc8b598fe0eb051fab8b9535c291b Author: Lasse Collin Date: Mon Mar 24 16:38:40 2008 +0200 Replaced the range decoder optimization that used arithmetic right shift with as fast version that doesn't need arithmetic right shift. Removed the related check from configure.ac. commit ad999efd279d95f1e7ac555b14170e8e9020488c Author: Lasse Collin Date: Sat Mar 22 14:39:34 2008 +0200 Take advantage of arithmetic right shift in range decoder. commit 03e0e8a0d7228b6ff1f0af39e2c040a4e425973d Author: Lasse Collin Date: Sat Mar 22 14:18:29 2008 +0200 Added autoconf check to detect if we can use arithmetic right shift for optimizations. commit 7521bbdc83acab834594a22bec50c8e1bd836298 Author: Lasse Collin Date: Sat Mar 22 01:26:36 2008 +0200 Update a comment to use the variable name rep_len_decoder. (And BTW, the previous commit actually did change the program logic slightly.) commit 63b74d000eedaebb8485f623e56864ff5ab71064 Author: Lasse Collin Date: Sat Mar 22 00:57:33 2008 +0200 Demystified the "state" variable in LZMA code. Use the word literal instead of char for better consistency. There are still some names with _char instead of _literal in lzma_optimum, these may be changed later. Renamed length coder variables. This commit doesn't change the program logic. commit e6eb0a26757e851cef62b9440319a8e73b015cb9 Author: Lasse Collin Date: Fri Mar 14 23:16:11 2008 +0200 Fix data corruption in LZMA encoder. Note that this bug was specific to liblzma and was *not* present in LZMA SDK. commit 7d516f5129e4373a6d57249d7f608c634c66bf12 Author: Lasse Collin Date: Fri Mar 14 21:32:37 2008 +0200 Fix a comment API header. commit 748d6e4274921a350bd0a317380309717441ef9c Author: Lasse Collin Date: Wed Mar 12 23:14:50 2008 +0200 Make lzma_stream.next_in const. Let's see if anyone complains. commit bfde3b24a5ae25ce53c854762b6148952386b025 Author: Lasse Collin Date: Tue Mar 11 15:35:34 2008 +0200 Apply a minor speed optimization to LZMA decoder. commit f310c50286d9e4e9c6170bb65348c9bb430a65b4 Author: Lasse Collin Date: Tue Mar 11 15:17:16 2008 +0200 Initialize the last byte of the dictionary to zero so that lz_get_byte(lz, 0) returns zero. This was broken by 1a3b21859818e4d8e89a1da99699233c1bfd197d. commit 5ead36cf7f823093672a4e43c3180b38c9abbaff Author: Lasse Collin Date: Mon Mar 10 15:57:55 2008 +0200 Really fix the price count initialization. commit d4d7feb83d1a1ded8f662a82e21e053841ca726c Author: Lasse Collin Date: Mon Mar 10 13:47:17 2008 +0200 Updated THANKS. commit 0541c5ea63ef3c0ff85eeddb0a420e56b0c65258 Author: Lasse Collin Date: Mon Mar 10 13:46:48 2008 +0200 Initialize align_price_count and match_price_count in lzma_encoder_init.c. While we don't call fill_distances_prices() and fill_align_prices() in lzma_lzma_encoder_init(), we still need to initialize these two variables so that the fill functions get called in lzma_encoder_getoptimum.c in the beginning of a stream. commit 596fa1fac72823e4ef5bc26bb53f9090445bf748 Author: Lasse Collin Date: Mon Mar 10 13:44:29 2008 +0200 Always initialize lz->temp_size in lz_decoder.c. temp_size did get initialized as a side-effect after allocating a new decoder, but not when the decoder was reused. commit 45e43e169527e7a98a8c8a821d37bf25822b764d Author: Lasse Collin Date: Mon Mar 10 13:41:25 2008 +0200 Don't fill allocated memory with 0xFD when debugging is enabled. It hides errors from Valgrind. commit c0e19e0662205f81a86da8903cdc325d50635870 Author: Lasse Collin Date: Thu Feb 28 10:24:31 2008 +0200 Remove two redundant validity checks from the LZMA decoder. These are already checked elsewhere, so omitting these gives (very) tiny speed up. commit de7485806284d1614095ae8cb2ebbb5d74c9ac45 Author: Lasse Collin Date: Wed Feb 6 13:25:32 2008 +0200 Tiny clean up to file-format.txt. commit 1a3b21859818e4d8e89a1da99699233c1bfd197d Author: Lasse Collin Date: Sat Feb 2 14:51:06 2008 +0200 Don't memzero() the history buffer when initializing LZ decoder. There's no danger of information leak here, so it isn't required. Doing memzero() takes a lot of time with large dictionaries, which could make it easier to construct DoS attack to consume too much CPU time. commit 7e796e312bf644ea95aea0ff85480f47cfa30fc0 Author: Lasse Collin Date: Fri Feb 1 08:39:26 2008 +0200 Do uncompressed size validation in raw encoder. This way it gets done for not only raw encoder, but also Block and LZMA_Alone encoders. commit 7dd48578a3853e0cfab9f1830bc30927173ec4bc Author: Lasse Collin Date: Fri Feb 1 08:32:05 2008 +0200 Avoid unneeded function call in raw_common.c. commit b596fac963c3ff96f615d4d9b427a213ec341211 Author: Lasse Collin Date: Sat Jan 26 21:42:38 2008 +0200 Updated THANKS. commit e9f6e9c075ad93141a568d94f7d4eb0f2edbd6c2 Author: Lasse Collin Date: Sat Jan 26 21:40:23 2008 +0200 Added note.GNU-stack to x86 assembler files. It is needed when using non-executable stack. commit 4c7ad179c78f97f68ad548cb40a9dfa6871655ae Author: Lasse Collin Date: Sat Jan 26 19:12:50 2008 +0200 Added api/lzma/easy.h. I had forgot to add this to the git repo. Thanks to Stephan Kulow. commit 288b232f54c3692cd36f471d4042f51daf3ea79f Author: Lasse Collin Date: Sat Jan 26 11:09:17 2008 +0200 Added more test files. commit c467b0defccf233d0c79234407bc38d7d09574d3 Author: Lasse Collin Date: Sat Jan 26 10:47:55 2008 +0200 Added more test files. commit f9842f712732c482f2def9f24437851e57dd83f8 Author: Lasse Collin Date: Sat Jan 26 00:25:34 2008 +0200 Return LZMA_HEADER_ERROR if LZMA_SYNC_FLUSH is used with any of the so called simple filters. If there is demand, limited support for LZMA_SYNC_FLUSH may be added in future. After this commit, using LZMA_SYNC_FLUSH shouldn't cause undefined behavior in any situation. commit e988ea1d1a286dd0f27af0657f9665d5cd8573aa Author: Lasse Collin Date: Fri Jan 25 23:50:35 2008 +0200 Added more Multi-Block test files. Improved some descriptions in the test files' README. commit 4441e004185cd4c61bda184010eca5924c9dec87 Author: Lasse Collin Date: Fri Jan 25 23:12:36 2008 +0200 Combine lzma_options_block validation needed by both Block encoder and decoder, and put the shared things to block_private.h. Improved the checks a little so that they may detect too big Compressed Size at initialization time if lzma_options_block.total_size or .total_limit is known. Allow encoding and decoding Blocks with combinations of fields that are not allowed by the file format specification. Doing this requires that the application passes such a combination in lzma_options_lzma; liblzma doesn't do that, but it's not impossible that someone could find them useful in some custom file format. commit bf4200c818fcf9102e56328d39cde91bfa13cfb6 Author: Lasse Collin Date: Fri Jan 25 19:21:22 2008 +0200 Added test_memlimit.c. commit 7b8fc7e6b501a32a36636dac79ecb57099269005 Author: Lasse Collin Date: Fri Jan 25 19:20:28 2008 +0200 Improved the memory limitter: - Added lzma_memlimit_max() and lzma_memlimit_reached() API functions. - Added simple estimation of malloc()'s memory usage overhead. - Fixed integer overflow detection in lzma_memlimit_alloc(). - Made some white space cleanups and added more comments. The description of lzma_memlimit_max() in memlimit.h is bad and should be improved. commit e0c3d0043da2f670cfdb1abbb3223d5a594ad8db Author: Lasse Collin Date: Fri Jan 25 13:55:52 2008 +0200 Use more parenthesis in succeed() macro in tests/tests.h. commit 1fd76d488179580d37f31ee11948f4932aed31fd Author: Lasse Collin Date: Thu Jan 24 14:49:34 2008 +0200 Added more Multi-Block Stream test files. commit 6e27b1098a28f4ce09bfa6df68ad94182dfc2936 Author: Lasse Collin Date: Thu Jan 24 00:46:05 2008 +0200 Added bunch of test files containing Multi-Block Streams. commit db9df0a9609c01a00a227329fb96e983971040f5 Author: Lasse Collin Date: Wed Jan 23 23:43:00 2008 +0200 Fix decoding of empty Metadata Blocks, that don't have even the Metadata Flags field. Earlier the code allowed such files; now they are prohibited as the file format specification requires. commit 765f0b05f6e95ed9194fb90819cee189ebbac36b Author: Lasse Collin Date: Wed Jan 23 23:38:18 2008 +0200 Fix a bug related to 99e12af4e2b866c011fe0106cd1e0bfdcc8fe9c6. lzma_metadata.header_metadata_size was not properly set to zero if the Metadata had only the Metadata Flags field. commit 3a7cc5c3dec7b078941f961b0393b86c418883b6 Author: Lasse Collin Date: Wed Jan 23 23:35:49 2008 +0200 Fix decoding of Extra Records that have empty Data. commit e5fdec93e273855c1bcc2579b83cfb481a9a1492 Author: Lasse Collin Date: Wed Jan 23 22:02:38 2008 +0200 Add the trailing '\0' to lzma_extra.data as the API header already documents. commit ed40dc5a2c28a8dfccab8c165b3780738eeef93e Author: Lasse Collin Date: Wed Jan 23 21:21:21 2008 +0200 Added debug/full_flush.c. commit ae0cd09a666a1682da8fc09487322227679e218d Author: Lasse Collin Date: Wed Jan 23 21:05:33 2008 +0200 Return LZMA_STREAM_END instead of LZMA_OK if LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH is used when there's no unfinished Block open. commit 0e80ded13dfceb98f9494cbb5381a95eb44d03db Author: Lasse Collin Date: Wed Jan 23 20:05:01 2008 +0200 Added bad-single-none-footer_filter_flags.lzma and bad-single-none-too_long_vli.lzma. commit 8c8eb14055d8dd536b1b1c58fb284d34bb8ed1dd Author: Lasse Collin Date: Wed Jan 23 13:42:35 2008 +0200 Fixed a typo. commit 980f65a9a10160c4d105767871e3002b9aaba3e0 Author: Lasse Collin Date: Wed Jan 23 13:40:45 2008 +0200 Fix a memory leak in the Subblock encoder. commit 99e12af4e2b866c011fe0106cd1e0bfdcc8fe9c6 Author: Lasse Collin Date: Wed Jan 23 13:36:07 2008 +0200 Fix Size of Header Metadata Block handling. Now lzma_metadata.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN is not allowed at all. To indicate missing Header Metadata Block, header_metadata_size must be set to zero. This is what Metadata decoder does after this patch too. Note that other missing fields in lzma_metadata are still indicated with LZMA_VLI_VALUE_UNKNOWN. This isn't as illogical as it sounds at first, because missing Size of Header Metadata Block means that Header Metadata Block is not present in the Stream. With other Metadata fields, a missing field means only that the value is unknown. commit 58b78ab20c1bcced45cf71ae6684868fc90b4b81 Author: Lasse Collin Date: Wed Jan 23 13:15:55 2008 +0200 Fix a memory leak in metadata_decoder.c. commit 4d8cdbdab44400fd98f0f18a0f701e27cd1acdae Author: Lasse Collin Date: Wed Jan 23 13:13:58 2008 +0200 Fix the fix 863028cb7ad6d8d0455fa69348f56b376d7b908f which just moved to problem. Now it's really fixed. commit 67321de963ccf69410b3868b8e31534fe18a90de Author: Lasse Collin Date: Wed Jan 23 00:21:04 2008 +0200 Take advantage of return_if_error() macro in lzma_info_metadata_set() in info.c. commit 863028cb7ad6d8d0455fa69348f56b376d7b908f Author: Lasse Collin Date: Wed Jan 23 00:18:32 2008 +0200 Fixed a dangling pointer that caused invalid free(). commit cf49f42a6bd40143f54a6b10d6e605599e958c0b Author: Lasse Collin Date: Tue Jan 22 22:49:24 2008 +0200 Added lzma_easy_* functions. These should make using liblzma as easy as using zlib, because the easy API don't require developers to know any fancy LZMA options. Note that Multi-Block Stream encoding is currently broken. The easy API should be OK, the bug(s) are elsewhere. commit 1747b85a43abc1c3f152dbd349be2ef4089ecf6a Author: Lasse Collin Date: Tue Jan 22 21:16:22 2008 +0200 Fix Multi-Block Stream encoder's EOPM usage. commit 0ed6f1adcea540fb9593ca115d36de537f7f0dc6 Author: Lasse Collin Date: Tue Jan 22 00:15:11 2008 +0200 Made lzma_extra pointers const in lzma_options_stream. commit 305afa38f64c75af8e81c4167e2d8fa8d85b53a4 Author: Lasse Collin Date: Sun Jan 20 20:15:21 2008 +0200 Updated debug/sync_flush.c. commit d53e9b77054cfade6a643e77d085273a348b189c Author: Lasse Collin Date: Sun Jan 20 20:14:26 2008 +0200 Added debug/repeat.c. commit 107259e306bcfc2336a0fb870fb58034c28faa52 Author: Lasse Collin Date: Sun Jan 20 20:12:58 2008 +0200 Fix alignment handling bugs in Subblock encoder. This leaves one known alignment bug unfixed: If repeat count doesn't fit into 28-bit integer, the encoder has to split this to multiple Subblocks with Subblock Type `Repeating Data'. The extra Subblocks may have wrong alignment. Correct alignment is restored after the split Repeating Data has been completely written out. Since the encoder doesn't even try to fix the alignment unless the size of Data is at least 4 bytes, to trigger this bug you need at least 4 GiB of repeating data with sequence length of 4 or more bytes. Since the worst thing done by this bug is misaligned data (no data corruption), this bug simply isn't worth fixing, because a proper fix isn't simple. commit e141fe18950400faaa3503ff88ac20eacd73e88c Author: Lasse Collin Date: Sat Jan 19 21:16:33 2008 +0200 Implemented LZMA_SYNC_FLUSH support to the Subblock encoder. The API for handing Subfilters was changed to make it consistent with LZMA_SYNC_FLUSH. A few sanity checks were added for Subfilter handling. Some small bugs were fixed. More comments were added. commit 23c227a864a3b69f38c6a74306161d4e6918d1cc Author: Lasse Collin Date: Sat Jan 19 15:19:21 2008 +0200 Revised the Delta filter implementation. The initialization function is still shared between encoder and decoder, but the actual coding is in separate files for encoder and decoder. There are now separate functions for the actual delta calculation depending on if Delta is the last filter in the chain or not. If it is the last, the new code copies the data from input to output buffer and does the delta calculation at the same time. The old code first copied the data, then did the delta in the target buffer, which required reading through the data twice. Support for LZMA_SYNC_FLUSH was added to the Delta encoder. This doesn't change anything in the file format. commit 61dc82f3e306b25ce3cd3d529df9ec7a0ec04b73 Author: Lasse Collin Date: Fri Jan 18 20:18:08 2008 +0200 Added the debug directory and the first debug tool (sync_flush). These tools are not built unless the user runs "make" in the debug directory. commit 0ae3208db94585eb8294b97ded387de0a3a07646 Author: Lasse Collin Date: Fri Jan 18 20:13:00 2008 +0200 Added test files to test usage of flush marker in LZMA. commit ab5feaf1fcc146ef9fd39360c53c290bec39524e Author: Lasse Collin Date: Fri Jan 18 20:02:52 2008 +0200 Fix LZMA_SYNC_FLUSH handling in LZ and LZMA encoders. That code is now almost completely in LZ coder, where it can be shared with other LZ77-based algorithms in future. commit 079c4f7fc26b3d0b33d9ae7536697b45f3b73585 Author: Lasse Collin Date: Fri Jan 18 17:21:24 2008 +0200 Don't add -g to CFLAGS when --enable-debug is specified. It's the job of the user to put that in CFLAGS. commit 61d1784d8f1761d979a6da6e223e279ca33815e6 Author: Lasse Collin Date: Fri Jan 18 14:17:37 2008 +0200 Set stdin and stdout to binary mode on Windows. This patch is a forward port of b7b22fcb979a16d3a47c8001f058c9f7d4416068 from lzma-utils-legacy.git. I don't know if the new code base builds on Windows, but this is a start. commit c9cba976913e55ff9aac8a8133cc94416c7c1c9c Author: Lasse Collin Date: Fri Jan 18 00:50:29 2008 +0200 Added test_compress.sh and bunch of files needed by it. This new set of tests compress and decompress several test files with many different compression options. This set of tests will be extended later. commit 33be3c0e24d8f43376ccf71cc77d53671e792f07 Author: Lasse Collin Date: Thu Jan 17 18:56:53 2008 +0200 Subblock decoder: Don't exit the main loop in decode_buffer() too early if we hit End of Input while decoding a Subblock of type Repeating Data. To keep the loop termination condition elegant, the order of enumerations in coder->sequence were changed. To keep the case-labels in roughly the same order as the enumerations in coder->sequence, large chunks of code was moved around. This made the diff big and ugly compared to the amount of the actual changes made. commit b254bd97b1cdb68d127523d91ca9e054ed89c4fd Author: Lasse Collin Date: Thu Jan 17 17:39:42 2008 +0200 Fix wrong too small size of argument unfiltered_max in ia64_coder_init(). It triggered assert() in simple_coder.c, and could have caused a buffer overflow. This error was probably a copypaste mistake, since most of the simple filters use unfiltered_max = 4. commit 8f5794c8f1a30e8e3b524b415bbe81af2e04c64a Author: Lasse Collin Date: Thu Jan 17 17:27:45 2008 +0200 Added --delta to the output of "lzma --help". commit f88590e0014b38d40465937c19f25f05f16c79ae Author: Lasse Collin Date: Thu Jan 17 13:14:20 2008 +0200 Fix Subblock docoder: If Subblock filter was used with known Uncompressed Size, and the last output byte was from RLE, the code didn't stop decoding as it should have done. commit bc0b945ca376e333077644d2f7fd54c2848aab8a Author: Lasse Collin Date: Wed Jan 16 16:33:37 2008 +0200 Tiny non-technical edits to file-format.txt. commit 7599bb7064ccf007f054595dedda7927af868252 Author: Lasse Collin Date: Wed Jan 16 14:48:04 2008 +0200 Plugged a memory leak in stream_decoder.c. commit 0b581539311f3712946e81e747839f8fb5f441a7 Author: Lasse Collin Date: Wed Jan 16 14:47:27 2008 +0200 Added memory leak detection to lzmadec.c. commit 5b5b13c7bb8fde6331064d21f3ebde41072480c4 Author: Lasse Collin Date: Wed Jan 16 14:46:50 2008 +0200 Added lzma_memlimit_count(). commit 19389f2b82ec54fd4c847a18f16482e7be4c9887 Author: Lasse Collin Date: Wed Jan 16 14:31:44 2008 +0200 Added ARRAY_SIZE(array) macro. commit 9bc33a54cbf83952130adbcb1be32c6882485416 Author: Lasse Collin Date: Wed Jan 16 13:27:03 2008 +0200 Make Uncompresed Size validation more strict in alone_decoder.c. commit 01d71d60b79027e1ce3eb9c79ae5191e1407c883 Author: Lasse Collin Date: Tue Jan 15 17:46:59 2008 +0200 Free the allocated memory in lzmadec if debugging is enabled. This should make it possible to detect possible memory leaks with Valgrind. commit 8235e6e5b2878f76633afcda9a334640db503ef5 Author: Lasse Collin Date: Tue Jan 15 16:25:38 2008 +0200 Fix memory leaks from test_block_header.c. commit f10fc6a69d40b6d5c9cfbf8d3746f49869c2e2f6 Author: Lasse Collin Date: Tue Jan 15 14:23:35 2008 +0200 Use fastpos.h when encoding LZMA dictionary size in Filter Flags encoder. commit e5728142a2048979f5c0c2149ce71ae952a092e1 Author: Lasse Collin Date: Tue Jan 15 14:02:22 2008 +0200 Revised the fastpos code. It now uses the slightly faster table-based version from LZMA SDK 4.57. This should be fast on most systems. A simpler and smaller alternative version is also provided. On some CPUs this can be even a little faster than the default table-based version (see comments in fastpos.h), but on most systems the table-based code is faster. commit 10437b5b567f6a025ff16c45a572e417a0a9cc26 Author: Lasse Collin Date: Tue Jan 15 13:32:13 2008 +0200 Added bsr.h. commit f3c88e8b8d8dd57f4bba5f0921eebf276437c244 Author: Lasse Collin Date: Tue Jan 15 13:29:14 2008 +0200 Fixed assembler detection in configure.ac, and added detection for x86_64. commit 54ec204f58287f50d3976288295da4188a19192b Author: Lasse Collin Date: Tue Jan 15 12:20:41 2008 +0200 Omit invalid space from printf() format string in price_table_gen.c. commit 01b4b19f49f00e17a0f9cb8754c672ac0847b6e1 Author: Lasse Collin Date: Tue Jan 15 09:54:34 2008 +0200 Removed a few unused macros from lzma_common.h. commit 19bd7f3cf25e4ff8487ef7098ca4a7b58681961d Author: Lasse Collin Date: Tue Jan 15 08:37:42 2008 +0200 Fix a typo in lzma_encoder.c. commit 9f9b1983013048f2142e8bc7e240149d2687bedc Author: Lasse Collin Date: Tue Jan 15 08:36:25 2008 +0200 Convert bittree_get_price() and bittree_reverse_get_price() from macros to inline functions. commit 78e85cb1a7667c54853670d2eb09d754bcbda87d Author: Lasse Collin Date: Tue Jan 15 07:44:59 2008 +0200 Fix CRC code in case --enable-small is used. commit 949d4346e2d75bcd9dcb66c394d8d851d8db3aa0 Author: Lasse Collin Date: Tue Jan 15 07:41:39 2008 +0200 Fix typo in test_index.c. commit d13d693155c176fc9e9ad5c50d48ccba27c2d9c6 Author: Lasse Collin Date: Tue Jan 15 07:40:21 2008 +0200 Added precomputed range coder probability price table. commit 362dc3843b373c1007a50a4719f378981f18ae03 Author: Lasse Collin Date: Mon Jan 14 13:42:43 2008 +0200 Remove RC_BUFFER_SIZE from lzma_encoder_private.h and replace it with a sanity check. commit e22b37968d153683fec61ad37b6b160cb7ca4ddc Author: Lasse Collin Date: Mon Jan 14 13:39:54 2008 +0200 Major changes to LZ encoder, LZMA encoder, and range encoder. These changes implement support for LZMA_SYNC_FLUSH in LZMA encoder, and move the temporary buffer needed by range encoder from lzma_range_encoder structure to lzma_lz_encoder. commit b59ef3973781f892c0a72b5e5934194567100be5 Author: Lasse Collin Date: Mon Jan 14 13:34:29 2008 +0200 Added one assert() to process.c of the command line tool. commit 9547e734a00ddb64c851fa3f116e4f9e7d763ea7 Author: Lasse Collin Date: Mon Jan 14 12:09:52 2008 +0200 Don't use coder->lz.stream_end_was_reached in assertions in match_c.h. commit 3e09e1c05871f3757f759b801890ccccc9286608 Author: Lasse Collin Date: Mon Jan 14 12:08:02 2008 +0200 In lzma_read_match_distances(), don't use coder->lz.stream_end_was_reached. That variable will be removed, and the check isn't required anyway. Rearrange the check so that it doesn't make one to think that there could be an integer overflow. commit a670fec8021e5962429689c194148a04c3418872 Author: Lasse Collin Date: Mon Jan 14 11:56:41 2008 +0200 Small LZMA_SYNC_FLUSH fixes to Block and Single-Stream encoders. commit 3599dba9570a6972a16b6398d6c838e9b420e985 Author: Lasse Collin Date: Mon Jan 14 11:54:56 2008 +0200 More fixes to LZMA decoder's flush marker handling. commit f73c2ab6079ed5675a42b39d584a567befbd4624 Author: Lasse Collin Date: Thu Jan 10 17:13:42 2008 +0200 Eliminate lzma_lz_encoder.must_move_pos. It's needed only in one place which isn't performance criticial. commit 382808514a42b2f4b4a64515e2dfb3fc1bc48ecd Author: Lasse Collin Date: Wed Jan 9 20:05:57 2008 +0200 Define HAVE_ASM_X86 when x86 assembler optimizations are used. This #define will be useful for inline assembly. commit 0e70fbe4032351aab13a1cd8e5deced105c0b276 Author: Lasse Collin Date: Wed Jan 9 12:06:46 2008 +0200 Added good-single-none-empty_3.lzma and bad-single-none-empty.lzma. commit 379fbbe84d922c7cc00afa65c6f0c095da596b19 Author: Lasse Collin Date: Tue Jan 8 23:11:59 2008 +0200 Take advantage of return_if_error() in block_decoder.c. commit 97d5fa82077e57815dfad995dc393c2809a78539 Author: Lasse Collin Date: Tue Jan 8 23:10:57 2008 +0200 Updated tests/files/README. commit 3bb9bb310936cba6a743b4f06739a397dec7c28f Author: Lasse Collin Date: Tue Jan 8 23:05:40 2008 +0200 Added test files with empty Compressed Data. commit 7054c5f5888ac6a7178cd43dc9583ce6c7e78c9f Author: Lasse Collin Date: Tue Jan 8 22:58:42 2008 +0200 Fix decoding of Blocks that have only Block Header. commit 753e4d95cd1cf29c632dfe1a670af7c67aeffbf4 Author: Lasse Collin Date: Tue Jan 8 22:27:46 2008 +0200 Added good-single-subblock_implicit.lzma. commit faeac7b7aca75f86afed1e7cc06279d9d497c627 Author: Lasse Collin Date: Tue Jan 8 18:50:30 2008 +0200 Disable CRC32 from Block Headers when --check=none has been specified. commit a751126dbb656767ed4666cf0e5d3e17349d93d1 Author: Lasse Collin Date: Tue Jan 8 13:36:29 2008 +0200 Fixed encoding of empty files. Arguments to is_size_valid() were in wrong order in block_encoder.c. commit 9080267603b1006c4867c823307dca9df8be0d20 Author: Lasse Collin Date: Tue Jan 8 13:35:36 2008 +0200 Added a few test files. commit b4943ccf73b64fc93a90a23474509c316f55eb2b Author: Lasse Collin Date: Tue Jan 8 12:29:58 2008 +0200 Avoid using ! in test_files.sh, because that doesn't work with some ancient /bin/sh versions. commit e2417b2b9134f3f65e14b61e23cd3644d8954353 Author: Lasse Collin Date: Tue Jan 8 00:48:30 2008 +0200 More pre-C99 inttypes.h compatibility fixes. Now the code should work even if the system has no inttypes.h. commit 5d227e51c23639423f4ade06aabb54e131f8505e Author: Lasse Collin Date: Mon Jan 7 23:25:32 2008 +0200 Updated fi.po although it's currently pretty much crap. commit c7189d981a1b27c63da0c1ee80d9b5cd8ce1733d Author: Lasse Collin Date: Mon Jan 7 23:14:25 2008 +0200 Test for $GCC = yes instead of if it is non-empty. This way it is possible to use ac_cv_c_compiler_gnu=no to force configure to think it is using non-GNU C compiler. commit 3dbbea82b74bb841c995ad332a3aeca613015e10 Author: Lasse Collin Date: Mon Jan 7 21:49:41 2008 +0200 Added test_files.sh to tests/Makefile.am so it gets included in the tarball with "make dist". commit 2fd2d181543feab1b4003f3ac6e85625fbee04f0 Author: Lasse Collin Date: Mon Jan 7 18:22:24 2008 +0200 Cosmetic edit to test_files.sh. commit 9a71d573100a990ceb30ce0bec6a9a15d795605f Author: Lasse Collin Date: Mon Jan 7 18:09:44 2008 +0200 Added tests/files/README. commit 47f48fe9936ed72617a60fbd015df7e0e47a1e43 Author: Lasse Collin Date: Mon Jan 7 14:20:57 2008 +0200 Tell in COPYING that everything in tests/files is public domain. commit 3502b3e1d00251d3c8dda96079440705c28d8225 Author: Lasse Collin Date: Mon Jan 7 14:19:05 2008 +0200 Cleaned up the tests/files directory. commit 908b2ac604b9940369d7fe8a45e9eb6da5d2a24c Author: Lasse Collin Date: Mon Jan 7 13:49:19 2008 +0200 Added test_files.sh to test decoding of the files in the tests/files directory. It doesn't test the malicious files yet. commit ecb2a6548f5978022a8fa931719dc575f5fd3bf6 Author: Lasse Collin Date: Mon Jan 7 11:23:13 2008 +0200 Updated README regarding the assembler optimizations. commit eacb8050438d3e6146c86eb9732d3fb1ef1825cb Author: Lasse Collin Date: Mon Jan 7 10:58:00 2008 +0200 Updated THANKS. commit 1239649f96132b18e3b7e2dd152ecf53a195caa8 Author: Lasse Collin Date: Sun Jan 6 21:47:17 2008 +0200 Cosmetic changes to configure.ac. commit 88ee301ec2e4506a30ec7ac9aaa2288e2dcadd0e Author: Lasse Collin Date: Sun Jan 6 19:46:38 2008 +0200 Automatically disable assembler code on Darwin x86. Darwin has different ABI than GNU+Linux and Solaris, thus the assembler code doesn't assemble on Darwin. commit c15a7abf66e3a70792f7444115e484c7981c8284 Author: Lasse Collin Date: Sun Jan 6 19:45:27 2008 +0200 With printf(), use PRIu64 with a cast to uint64_t instead of %zu, because some pre-C99 libc versions don't support %zu. commit 4e7e54c4c522ab2f6a7abb92cefc4f707e9568fb Author: Lasse Collin Date: Sun Jan 6 16:27:41 2008 +0200 Introduced compatibility with systems that have pre-C99 or no inttypes.h. This is useful when the compiler has good enough support for C99, but libc headers don't. Changed liblzma API so that sys/types.h and inttypes.h have to be #included before #including lzma.h. On systems that don't have C99 inttypes.h, it's the problem of the applications to provide the required types and macros before #including lzma.h. If lzma.h defined the missing types and macros, it could conflict with third-party applications whose configure has detected that the types are missing and defined them in config.h already. An alternative would have been introducing lzma_uint32 and similar types, but that would just be an extra pain on modern systems. commit a71864f77dfb76b5d78a270641539947c312583a Author: Lasse Collin Date: Sat Jan 5 19:57:00 2008 +0200 Fix typo in comment (INT64_MAX -> UINT64_MAX). commit 072927905a3b66281c6311b4b351caa501d8b73a Author: Lasse Collin Date: Sat Jan 5 19:42:04 2008 +0200 Rearranged testing of GCC-specific flags. commit d160ee32598c6d1cd9054ef019e8c9331208b188 Author: Lasse Collin Date: Sat Jan 5 01:20:24 2008 +0200 Another bug fix for flush marker detection. commit fc67f79f607cbfa78c6f47a69dec098d8659b162 Author: Lasse Collin Date: Fri Jan 4 21:37:01 2008 +0200 Fix stupid bugs in flush marker detection. commit 0029cbbabe87d491fc046a55a629a6d556010baa Author: Lasse Collin Date: Fri Jan 4 21:30:33 2008 +0200 Added support for flush marker, which will be in files that use LZMA_SYNC_FLUSH with encoder (not implemented yet). This is a new feature in the raw LZMA format, which isn't supported by old decoders. This shouldn't be a problem in practice, since lzma_alone_encoder() will not allow LZMA_SYNC_FLUSH, and thus not allow creating files on decodable with old decoders. Made lzma_decoder.c to require tab width of 4 characters if one wants to fit the code in 80 columns. This makes the code easier to read. commit bbfd1f6ab058a7e661545205befcb7f70c5685ab Author: Lasse Collin Date: Fri Jan 4 20:45:05 2008 +0200 Moved range decoder initialization (reading the first five input bytes) from LZMA decoder to range decoder header. Did the same for decoding of direct bits. commit 5db745cd2a74f6ed2e52f5c716c08ed0daf17ebc Author: Lasse Collin Date: Fri Dec 14 11:15:21 2007 +0200 Added a note to README that --disable-assembler must be used on Darwin. commit 44b333d4615b5aabc557a0e1b6bb0096da3fae24 Author: Lasse Collin Date: Fri Dec 14 10:07:10 2007 +0200 Use the filename suffix .S instead of .s for assembler files so that the preprocessor removes the /* */ style comments, which are not supported by some non-GNU assemblers (Solaris) that otherwise work with this code. commit ec1c82b2e82f395f6e8e19ac212a639644330cd7 Author: Lasse Collin Date: Fri Dec 14 09:59:05 2007 +0200 Fixed wrong symbol name in crc64_x86.s. commit 2881570df6803eed2fe550af34574e8e61794804 Author: Lasse Collin Date: Fri Dec 14 09:53:24 2007 +0200 Use .globl instead of .global in x86 assembler code for better portability. Still needs fixing the commenting. commit 698470b8f33fc0e5f27dafa93b39b6dd5dde5a66 Author: Lasse Collin Date: Thu Dec 13 20:14:37 2007 +0200 Fixed a few short options that take an argument. short_opts[] was missing colons to indicate required argument. Thanks to Fabio Pedretti for the bug report. commit 918bcb0e0728d2d976621e9f35b56f224f11d989 Author: Lasse Collin Date: Tue Dec 11 17:08:04 2007 +0200 Removed uncompressed size tracking from Delta encoder too. commit 3e16d51dd645667b05ff826665b1fc353aa41cd9 Author: Lasse Collin Date: Tue Dec 11 16:49:19 2007 +0200 Remove uncompressed size tracking from the filter encoders. It's not strictly needed there, and just complicates the code. LZ encoder never even had this feature. The primary reason to have uncompressed size tracking in filter encoders was validating that the application doesn't give different amount of input that it had promised. A side effect was to validate internal workings of liblzma. Uncompressed size tracking is still present in the Block encoder. Maybe it should be added to LZMA_Alone and raw encoders too. It's simpler to have one coder just to validate the uncompressed size instead of having it in every filter. commit 5286723e0d1ac386d5b07f08d78e61becf895a5a Author: Lasse Collin Date: Tue Dec 11 14:10:53 2007 +0200 Get rid of no-NLS gnulib. I don't know how to get it working with Automake. People who want smaller lzmadec should use --disable-nls on non-GNU systems. commit ce8b036a6c7a43b290356b673d953f6d76b2be64 Author: Lasse Collin Date: Tue Dec 11 14:09:35 2007 +0200 Fixed a typo in tests/Makefile.am which prevented building the tests if gnulib was needed. commit 7c1ad41eb611ed89e5bb8792a3beb533b7aa59f4 Author: Lasse Collin Date: Tue Dec 11 11:18:58 2007 +0200 Fixed wrong type of flags_size in Subblock encoder. commit ce64df716243fdc40359090d1f6541f3a4f5f21a Author: Lasse Collin Date: Mon Dec 10 20:44:16 2007 +0200 Bumped version number to 4.42.3alpha. commit b499a0403ea5c41d6a25b40275eb6c57643052ce Author: Lasse Collin Date: Mon Dec 10 15:02:50 2007 +0200 Disabled some unneeded warnings and made "make dist" work. commit 2ab8adb5165a0b77114a7eb21f9ff1e6a266f172 Author: Lasse Collin Date: Sun Dec 9 21:43:15 2007 +0200 Added LZMA_SYNC_FLUSH support to the Copy filter. commit 329c272d501e88793dda5540358d55c12428d194 Author: Lasse Collin Date: Sun Dec 9 17:14:07 2007 +0200 Added missing LZMA_API to the C versions of the CRC functions. The x86 assembler versions were already OK. commit c90daf86ce683fa8cf80491d624ffb158dfbd9d7 Author: Jim Meyering Date: Sun Dec 9 15:34:25 2007 +0100 * tests/test_block_header.c (test3): Remove duplicate initializer. commit 07ac881779a8477f2c1ab112b91a129e24aa743c Author: Lasse Collin Date: Sun Dec 9 17:06:45 2007 +0200 Take advantage of return_if_error() macro in more places. Cleaned Subblock filter's initialization code too. commit 41338717964f510ee61d70b25bd4c502ec9f77cf Author: Lasse Collin Date: Sun Dec 9 12:13:01 2007 +0200 Added a bunch of .lzma test files. commit ff946ceb7975d4f11950afd33f6315b4d20d1a03 Author: Lasse Collin Date: Sun Dec 9 11:24:48 2007 +0200 Re-enabled the security checks in Subblock decoder that were disabled for debugging reasons. commit 2bf36d22d2c24ac3f488e63b35564fa2f6dab8d1 Author: Lasse Collin Date: Sun Dec 9 11:03:28 2007 +0200 Fixed the tests to build with -Werror. commit 5d018dc03549c1ee4958364712fb0c94e1bf2741 Author: Lasse Collin Date: Sun Dec 9 00:42:33 2007 +0200 Imported to git. diff --git a/README b/README index 1b02f05e3de8..ab8aadfb5338 100644 --- a/README +++ b/README @@ -1,308 +1,308 @@ XZ Utils ======== 0. Overview 1. Documentation 1.1. Overall documentation 1.2. Documentation for command-line tools 1.3. Documentation for liblzma 2. Version numbering 3. Reporting bugs 4. Translating the xz tool 5. Other implementations of the .xz format 6. Contact information 0. Overview ----------- XZ Utils provide a general-purpose data-compression library plus command-line tools. The native file format is the .xz format, but also the legacy .lzma format is supported. The .xz format supports multiple compression algorithms, which are called "filters" in the context of XZ Utils. The primary filter is currently LZMA2. With typical files, XZ Utils create about 30 % smaller files than gzip. To ease adapting support for the .xz format into existing applications and scripts, the API of liblzma is somewhat similar to the API of the popular zlib library. For the same reason, the command-line tool xz has a command-line syntax similar to that of gzip. When aiming for the highest compression ratio, the LZMA2 encoder uses a lot of CPU time and may use, depending on the settings, even hundreds of megabytes of RAM. However, in fast modes, the LZMA2 encoder competes with bzip2 in compression speed, RAM usage, and compression ratio. LZMA2 is reasonably fast to decompress. It is a little slower than gzip, but a lot faster than bzip2. Being fast to decompress means that the .xz format is especially nice when the same file will be decompressed very many times (usually on different computers), which is the case e.g. when distributing software packages. In such situations, it's not too bad if the compression takes some time, since that needs to be done only once to benefit many people. With some file types, combining (or "chaining") LZMA2 with an additional filter can improve the compression ratio. A filter chain may contain up to four filters, although usually only one or two are used. For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2 in the filter chain can improve compression ratio of executable files. Since the .xz format allows adding new filter IDs, it is possible that some day there will be a filter that is, for example, much faster to compress than LZMA2 (but probably with worse compression ratio). Similarly, it is possible that some day there is a filter that will compress better than LZMA2. XZ Utils doesn't support multithreaded compression or decompression yet. It has been planned though and taken into account when designing the .xz file format. 1. Documentation ---------------- 1.1. Overall documentation README This file INSTALL.generic Generic install instructions for those not familiar with packages using GNU Autotools INSTALL Installation instructions specific to XZ Utils PACKAGERS Information to packagers of XZ Utils COPYING XZ Utils copyright and license information COPYING.GPLv2 GNU General Public License version 2 COPYING.GPLv3 GNU General Public License version 3 COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 AUTHORS The main authors of XZ Utils THANKS Incomplete list of people who have helped making this software NEWS User-visible changes between XZ Utils releases ChangeLog Detailed list of changes (commit log) TODO Known bugs and some sort of to-do list Note that only some of the above files are included in binary packages. 1.2. Documentation for command-line tools The command-line tools are documented as man pages. In source code releases (and possibly also in some binary packages), the man pages are also provided in plain text (ASCII only) and PDF formats in the directory "doc/man" to make the man pages more accessible to those whose operating system doesn't provide an easy way to view man pages. 1.3. Documentation for liblzma The liblzma API headers include short docs about each function and data type as Doxygen tags. These docs should be quite OK as a quick reference. I have planned to write a bunch of very well documented example programs, which (due to comments) should work as a tutorial to various features of liblzma. No such example programs have been written yet. For now, if you have never used liblzma, libbzip2, or zlib, I recommend learning the *basics* of the zlib API. Once you know that, it should be easier to learn liblzma. http://zlib.net/manual.html http://zlib.net/zlib_how.html 2. Version numbering -------------------- The version number format of XZ Utils is X.Y.ZS: - X is the major version. When this is incremented, the library API and ABI break. - Y is the minor version. It is incremented when new features are added without breaking the existing API or ABI. An even Y indicates a stable release and an odd Y indicates unstable (alpha or beta version). - Z is the revision. This has a different meaning for stable and unstable releases: * Stable: Z is incremented when bugs get fixed without adding any new features. This is intended to be convenient for downstream distributors that want bug fixes but don't want any new features to minimize the risk of introducing new bugs. * Unstable: Z is just a counter. API or ABI of features added in earlier unstable releases having the same X.Y may break. - S indicates stability of the release. It is missing from the stable releases, where Y is an even number. When Y is odd, S is either "alpha" or "beta" to make it very clear that such versions are not stable releases. The same X.Y.Z combination is not used for more than one stability level, i.e. after X.Y.Zalpha, the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta. 3. Reporting bugs ----------------- Naturally it is easiest for me if you already know what causes the unexpected behavior. Even better if you have a patch to propose. However, quite often the reason for unexpected behavior is unknown, so here are a few things to do before sending a bug report: 1. Try to create a small example how to reproduce the issue. 2. Compile XZ Utils with debugging code using configure switches --enable-debug and, if possible, --disable-shared. If you are using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting binaries. 3. Turn on core dumps. The exact command depends on your shell; for example in GNU bash it is done with "ulimit -c unlimited", and in tcsh with "limit coredumpsize unlimited". 4. Try to reproduce the suspected bug. If you get "assertion failed" message, be sure to include the complete message in your bug report. If the application leaves a coredump, get a backtrace using gdb: $ gdb /path/to/app-binary # Load the app to the debugger. (gdb) core core # Open the coredump. (gdb) bt # Print the backtrace. Copy & paste to bug report. (gdb) quit # Quit gdb. Report your bug via email or IRC (see Contact information below). Don't send core dump files or any executables. If you have a small example file(s) (total size less than 256 KiB), please include it/them as an attachment. If you have bigger test files, put them online somewhere and include a URL to the file(s) in the bug report. Always include the exact version number of XZ Utils in the bug report. If you are using a snapshot from the git repository, use "git describe" to get the exact snapshot version. If you are using XZ Utils shipped in an operating system distribution, mention the distribution name, distribution version, and exact xz package version; if you cannot repeat the bug with the code compiled from unpatched source code, you probably need to report a bug to your distribution's bug tracking system. 4. Translating the xz tool -------------------------- The messages from the xz tool have been translated into a few languages. Before starting to translate into a new language, ask the author whether someone else hasn't already started working on it. Test your translation. Testing includes comparing the translated output to the original English version by running the same commands in both your target locale and with LC_ALL=C. Ask someone to proof-read and test the translation. Testing can be done e.g. by installing xz into a temporary directory: ./configure --disable-shared --prefix=/tmp/xz-test # make -C po update-po make install - bash debug/translations.bash | less - bash debug/translations.bash | less -S # For --list outputs + bash debug/translation.bash | less + bash debug/translation.bash | less -S # For --list outputs Repeat the above as needed (no need to re-run configure though). Note especially the following: - The output of --help and --long-help must look nice on an 80-column terminal. It's OK to add extra lines if needed. - In contrast, don't add extra lines to error messages and such. They are often preceded with e.g. a filename on the same line, so you have no way to predict where to put a \n. Let the terminal do the wrapping even if it looks ugly. Adding new lines will be even uglier in the generic case even if it looks nice in a few limited examples. - Be careful with column alignment in tables and table-like output (--list, --list --verbose --verbose, --info-memory, --help, and --long-help): * All descriptions of options in --help should start in the same column (but it doesn't need to be the same column as in the English messages; just be consistent if you change it). Check that both --help and --long-help look OK, since they share several strings. * --list --verbose and --info-memory print lines that have the format "Description: %s". If you need a longer description, you can put extra space between the colon and %s. Then you may need to add extra space to other strings too so that the result as a whole looks good (all values start at the same column). * The columns of the actual tables in --list --verbose --verbose should be aligned properly. Abbreviate if necessary. It might be good to keep at least 2 or 3 spaces between column headings and avoid spaces in the headings so that the columns stand out better, but this is a matter of opinion. Do what you think looks best. - Be careful to put a period at the end of a sentence when the original version has it, and don't put it when the original doesn't have it. Similarly, be careful with \n characters at the beginning and end of the strings. - Read the TRANSLATORS comments that have been extracted from the source code and included in xz.pot. If they suggest testing the translation with some type of command, do it. If testing needs input files, use e.g. tests/files/good-*.xz. - When updating the translation, read the fuzzy (modified) strings carefully, and don't mark them as updated before you actually have updated them. Reading through the unchanged messages can be good too; sometimes you may find a better wording for them. - If you find language problems in the original English strings, feel free to suggest improvements. Ask if something is unclear. - The translated messages should be understandable (sometimes this may be a problem with the original English messages too). Don't make a direct word-by-word translation from English especially if the result doesn't sound good in your language. In short, take your time and pay attention to the details. Making a good translation is not a quick and trivial thing to do. The translated xz should look as polished as the English version. 5. Other implementations of the .xz format ------------------------------------------ 7-Zip and the p7zip port of 7-Zip support the .xz format starting from the version 9.00alpha. http://7-zip.org/ http://p7zip.sourceforge.net/ XZ Embedded is a limited implementation written for use in the Linux kernel, but it is also suitable for other embedded use. http://tukaani.org/xz/embedded.html 6. Contact information ---------------------- If you have questions, bug reports, patches etc. related to XZ Utils, contact Lasse Collin (in Finnish or English). I'm sometimes slow at replying. If you haven't got a reply within two weeks, assume that your email has got lost and resend it or use IRC. You can find me also from #tukaani on Freenode; my nick is Larhzu. The channel tends to be pretty quiet, so just ask your question and someone may wake up. diff --git a/THANKS b/THANKS index b13503e2d360..36b2e74c9fec 100644 --- a/THANKS +++ b/THANKS @@ -1,91 +1,95 @@ Thanks ====== Some people have helped more, some less, but nevertheless everyone's help has been important. :-) In alphabetical order: - Mark Adler - H. Peter Anvin + - Jeff Bastian - Nelson H. F. Beebe - Karl Berry - Anders F. Björklund - Emmanuel Blot - Martin Blumenstingl - Jakub Bogusz - Maarten Bosmans - Trent W. Buck - James Buren - David Burklund - Daniel Mealha Cabrita - Milo Casagrande - Marek Černocký - Chris Donawa - Andrew Dudman - Markus Duft - İsmail Dönmez - Robert Elz - Gilles Espinasse - Denis Excoffier - Michael Felt - Mike Frysinger - Jason Gorski - Juan Manuel Guerrero - Joachim Henke - Peter Ivanov - Jouk Jansen - Per Øyvind Karlsen - Thomas Klausner - Richard Koch - Ville Koskinen - Stephan Kulow - Peter Lawler - Hin-Tak Leung - Andraž 'ruskie' Levstik - Cary Lewis - Wim Lewis - Lorenzo De Liso - Bela Lubkin - Gregory Margo - Jim Meyering + - Conley Moorhous - Rafał Mużyło - Adrien Nader - Hongbo Ni - Jonathan Nieder - Andre Noll - Peter O'Gorman - Peter Pallinger - Igor Pavlov - Diego Elio Pettenò - Elbert Pol - Mikko Pouru + - Pavel Raiskup - Robert Readman - Bernhard Reutner-Fischer + - Eric S. Raymond - Cristian Rodríguez - Christian von Roques - Jukka Salmi - Alexandre Sauvé - Benno Schulenberg - Andreas Schwab - Dan Shechter - Stuart Shelton - Jonathan Stott - Dan Stromberg - Paul Townsend - Mohammed Adnène Trojette - Alexey Tourbin - Patrick J. Volkerding - Martin Väth - Christian Weisgerber - Bert Wesarg - Ralf Wildenhues - Charles Wilson - Lars Wirzenius - Pilorz Wojciech - Ryan Young - Andreas Zieringer Also thanks to all the people who have participated in the Tukaani project. I have probably forgot to add some names to the above list. Sorry about that and thanks for your help. diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h index 629e8e1a5be8..66e93965dcab 100644 --- a/src/liblzma/api/lzma/version.h +++ b/src/liblzma/api/lzma/version.h @@ -1,121 +1,121 @@ /** * \file lzma/version.h * \brief Version number */ /* * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. * * See ../lzma.h for information about liblzma as a whole. */ #ifndef LZMA_H_INTERNAL # error Never include this file directly. Use instead. #endif /* * Version number split into components */ #define LZMA_VERSION_MAJOR 5 #define LZMA_VERSION_MINOR 0 -#define LZMA_VERSION_PATCH 4 +#define LZMA_VERSION_PATCH 5 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE #ifndef LZMA_VERSION_COMMIT # define LZMA_VERSION_COMMIT "" #endif /* * Map symbolic stability levels to integers. */ #define LZMA_VERSION_STABILITY_ALPHA 0 #define LZMA_VERSION_STABILITY_BETA 1 #define LZMA_VERSION_STABILITY_STABLE 2 /** * \brief Compile-time version number * * The version number is of format xyyyzzzs where * - x = major * - yyy = minor * - zzz = revision * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable * * The same xyyyzzz triplet is never reused with different stability levels. * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta * or 5.1.0 stable. * * \note The version number of liblzma has nothing to with * the version number of Igor Pavlov's LZMA SDK. */ #define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + LZMA_VERSION_MINOR * UINT32_C(10000) \ + LZMA_VERSION_PATCH * UINT32_C(10) \ + LZMA_VERSION_STABILITY) /* * Macros to construct the compile-time version string */ #if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA # define LZMA_VERSION_STABILITY_STRING "alpha" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA # define LZMA_VERSION_STABILITY_STRING "beta" #elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE # define LZMA_VERSION_STABILITY_STRING "" #else # error Incorrect LZMA_VERSION_STABILITY #endif #define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ #major "." #minor "." #patch stability commit #define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) /** * \brief Compile-time version as a string * * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable * versions don't have any "stable" suffix). In future, a snapshot built * from source code repository may include an additional suffix, for example * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form * in LZMA_VERSION macro. */ #define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ LZMA_VERSION_COMMIT) /* #ifndef is needed for use with windres (MinGW or Cygwin). */ #ifndef LZMA_H_INTERNAL_RC /** * \brief Run-time version number as an integer * * Return the value of LZMA_VERSION macro at the compile time of liblzma. * This allows the application to compare if it was built against the same, * older, or newer version of liblzma that is currently running. */ extern LZMA_API(uint32_t) lzma_version_number(void) lzma_nothrow lzma_attr_const; /** * \brief Run-time version as a string * * This function may be useful if you want to display which version of * liblzma your application is currently using. */ extern LZMA_API(const char *) lzma_version_string(void) lzma_nothrow lzma_attr_const; #endif diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c index 04231dbaef99..23bda9222dda 100644 --- a/src/liblzma/check/sha256.c +++ b/src/liblzma/check/sha256.c @@ -1,201 +1,200 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file sha256.c /// \brief SHA-256 /// /// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they /// are imported to liblzma, SSE instructions need to be used /// conditionally to keep the code working on older boxes. -/// We could also support using some external libary for SHA-256. // // This code is based on the code found from 7-Zip, which has a modified // version of the SHA-256 found from Crypto++ . // The code was modified a little to fit into liblzma. // // Authors: Kevin Springle // Wei Dai // Igor Pavlov // Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// // Avoid bogus warnings in transform(). #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __GNUC__ > 4 # pragma GCC diagnostic ignored "-Wuninitialized" #endif #include "check.h" // At least on x86, GCC is able to optimize this to a rotate instruction. #define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount))) #define blk0(i) (W[i] = data[i]) #define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ + s0(W[(i - 15) & 15])) #define Ch(x, y, z) (z ^ (x & (y ^ z))) #define Maj(x, y, z) ((x & y) | (z & (x | y))) #define a(i) T[(0 - i) & 7] #define b(i) T[(1 - i) & 7] #define c(i) T[(2 - i) & 7] #define d(i) T[(3 - i) & 7] #define e(i) T[(4 - i) & 7] #define f(i) T[(5 - i) & 7] #define g(i) T[(6 - i) & 7] #define h(i) T[(7 - i) & 7] #define R(i) \ h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \ + (j ? blk2(i) : blk0(i)); \ d(i) += h(i); \ h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) #define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22)) #define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25)) #define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3)) #define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10)) static const uint32_t SHA256_K[64] = { 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, }; static void transform(uint32_t state[static 8], const uint32_t data[static 16]) { uint32_t W[16]; uint32_t T[8]; // Copy state[] to working vars. memcpy(T, state, sizeof(T)); // 64 operations, partially loop unrolled for (unsigned int j = 0; j < 64; j += 16) { R( 0); R( 1); R( 2); R( 3); R( 4); R( 5); R( 6); R( 7); R( 8); R( 9); R(10); R(11); R(12); R(13); R(14); R(15); } // Add the working vars back into state[]. state[0] += a(0); state[1] += b(0); state[2] += c(0); state[3] += d(0); state[4] += e(0); state[5] += f(0); state[6] += g(0); state[7] += h(0); } static void process(lzma_check_state *check) { #ifdef WORDS_BIGENDIAN transform(check->state.sha256.state, check->buffer.u32); #else uint32_t data[16]; for (size_t i = 0; i < 16; ++i) data[i] = bswap32(check->buffer.u32[i]); transform(check->state.sha256.state, data); #endif return; } extern void lzma_sha256_init(lzma_check_state *check) { static const uint32_t s[8] = { 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, }; memcpy(check->state.sha256.state, s, sizeof(s)); check->state.sha256.size = 0; return; } extern void lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check) { // Copy the input data into a properly aligned temporary buffer. // This way we can be called with arbitrarily sized buffers // (no need to be multiple of 64 bytes), and the code works also // on architectures that don't allow unaligned memory access. while (size > 0) { const size_t copy_start = check->state.sha256.size & 0x3F; size_t copy_size = 64 - copy_start; if (copy_size > size) copy_size = size; memcpy(check->buffer.u8 + copy_start, buf, copy_size); buf += copy_size; size -= copy_size; check->state.sha256.size += copy_size; if ((check->state.sha256.size & 0x3F) == 0) process(check); } return; } extern void lzma_sha256_finish(lzma_check_state *check) { // Add padding as described in RFC 3174 (it describes SHA-1 but // the same padding style is used for SHA-256 too). size_t pos = check->state.sha256.size & 0x3F; check->buffer.u8[pos++] = 0x80; while (pos != 64 - 8) { if (pos == 64) { process(check); pos = 0; } check->buffer.u8[pos++] = 0x00; } // Convert the message size from bytes to bits. check->state.sha256.size *= 8; check->buffer.u64[(64 - 8) / 8] = conv64be(check->state.sha256.size); process(check); for (size_t i = 0; i < 8; ++i) check->buffer.u32[i] = conv32be(check->state.sha256.state[i]); return; } diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c index 678c79e65433..c25112e6875f 100644 --- a/src/liblzma/common/alone_decoder.c +++ b/src/liblzma/common/alone_decoder.c @@ -1,232 +1,238 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file alone_decoder.c /// \brief Decoder for LZMA_Alone files // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "alone_decoder.h" #include "lzma_decoder.h" #include "lz_decoder.h" struct lzma_coder_s { lzma_next_coder next; enum { SEQ_PROPERTIES, SEQ_DICTIONARY_SIZE, SEQ_UNCOMPRESSED_SIZE, SEQ_CODER_INIT, SEQ_CODE, } sequence; + /// If true, reject files that are unlikely to be .lzma files. + /// If false, more non-.lzma files get accepted and will give + /// LZMA_DATA_ERROR either immediately or after a few output bytes. + bool picky; + /// Position in the header fields size_t pos; /// Uncompressed size decoded from the header lzma_vli uncompressed_size; /// Memory usage limit uint64_t memlimit; /// Amount of memory actually needed (only an estimate) uint64_t memusage; /// Options decoded from the header needed to initialize /// the LZMA decoder lzma_options_lzma options; }; static lzma_ret alone_decode(lzma_coder *coder, lzma_allocator *allocator lzma_attribute((__unused__)), const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { while (*out_pos < out_size && (coder->sequence == SEQ_CODE || *in_pos < in_size)) switch (coder->sequence) { case SEQ_PROPERTIES: if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) return LZMA_FORMAT_ERROR; coder->sequence = SEQ_DICTIONARY_SIZE; ++*in_pos; break; case SEQ_DICTIONARY_SIZE: coder->options.dict_size |= (size_t)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 4) { - if (coder->options.dict_size != UINT32_MAX) { + if (coder->picky && coder->options.dict_size + != UINT32_MAX) { // A hack to ditch tons of false positives: // We allow only dictionary sizes that are // 2^n or 2^n + 2^(n-1). LZMA_Alone created // only files with 2^n, but accepts any - // dictionary size. If someone complains, this - // will be reconsidered. + // dictionary size. uint32_t d = coder->options.dict_size - 1; d |= d >> 2; d |= d >> 3; d |= d >> 4; d |= d >> 8; d |= d >> 16; ++d; if (d != coder->options.dict_size) return LZMA_FORMAT_ERROR; } coder->pos = 0; coder->sequence = SEQ_UNCOMPRESSED_SIZE; } ++*in_pos; break; case SEQ_UNCOMPRESSED_SIZE: coder->uncompressed_size |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); ++*in_pos; if (++coder->pos < 8) break; // Another hack to ditch false positives: Assume that // if the uncompressed size is known, it must be less - // than 256 GiB. Again, if someone complains, this - // will be reconsidered. - if (coder->uncompressed_size != LZMA_VLI_UNKNOWN + // than 256 GiB. + if (coder->picky + && coder->uncompressed_size != LZMA_VLI_UNKNOWN && coder->uncompressed_size >= (LZMA_VLI_C(1) << 38)) return LZMA_FORMAT_ERROR; // Calculate the memory usage so that it is ready // for SEQ_CODER_INIT. coder->memusage = lzma_lzma_decoder_memusage(&coder->options) + LZMA_MEMUSAGE_BASE; coder->pos = 0; coder->sequence = SEQ_CODER_INIT; // Fall through case SEQ_CODER_INIT: { if (coder->memusage > coder->memlimit) return LZMA_MEMLIMIT_ERROR; lzma_filter_info filters[2] = { { .init = &lzma_lzma_decoder_init, .options = &coder->options, }, { .init = NULL, } }; const lzma_ret ret = lzma_next_filter_init(&coder->next, allocator, filters); if (ret != LZMA_OK) return ret; // Use a hack to set the uncompressed size. lzma_lz_decoder_uncompressed(coder->next.coder, coder->uncompressed_size); coder->sequence = SEQ_CODE; break; } case SEQ_CODE: { return coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); } default: return LZMA_PROG_ERROR; } return LZMA_OK; } static void alone_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); return; } static lzma_ret alone_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { *memusage = coder->memusage; *old_memlimit = coder->memlimit; if (new_memlimit != 0) { if (new_memlimit < coder->memusage) return LZMA_MEMLIMIT_ERROR; coder->memlimit = new_memlimit; } return LZMA_OK; } extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - uint64_t memlimit) + uint64_t memlimit, bool picky) { lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); if (memlimit == 0) return LZMA_PROG_ERROR; if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) return LZMA_MEM_ERROR; next->code = &alone_decode; next->end = &alone_decoder_end; next->memconfig = &alone_decoder_memconfig; next->coder->next = LZMA_NEXT_CODER_INIT; } next->coder->sequence = SEQ_PROPERTIES; + next->coder->picky = picky; next->coder->pos = 0; next->coder->options.dict_size = 0; next->coder->options.preset_dict = NULL; next->coder->options.preset_dict_size = 0; next->coder->uncompressed_size = 0; next->coder->memlimit = memlimit; next->coder->memusage = LZMA_MEMUSAGE_BASE; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) { - lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit); + lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit, false); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/src/liblzma/common/alone_decoder.h b/src/liblzma/common/alone_decoder.h index 70d0d2a0ffa4..f666fc3823e1 100644 --- a/src/liblzma/common/alone_decoder.h +++ b/src/liblzma/common/alone_decoder.h @@ -1,22 +1,23 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file alone_decoder.h /// \brief Decoder for LZMA_Alone files // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_ALONE_DECODER_H #define LZMA_ALONE_DECODER_H #include "common.h" -extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, uint64_t memlimit); +extern lzma_ret lzma_alone_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + uint64_t memlimit, bool picky); #endif diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c index ae6c3e78642c..35c895fd14c2 100644 --- a/src/liblzma/common/auto_decoder.c +++ b/src/liblzma/common/auto_decoder.c @@ -1,186 +1,186 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file auto_decoder.c /// \brief Autodetect between .xz Stream and .lzma (LZMA_Alone) formats // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "stream_decoder.h" #include "alone_decoder.h" struct lzma_coder_s { /// Stream decoder or LZMA_Alone decoder lzma_next_coder next; uint64_t memlimit; uint32_t flags; enum { SEQ_INIT, SEQ_CODE, SEQ_FINISH, } sequence; }; static lzma_ret auto_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { switch (coder->sequence) { case SEQ_INIT: if (*in_pos >= in_size) return LZMA_OK; // Update the sequence now, because we want to continue from // SEQ_CODE even if we return some LZMA_*_CHECK. coder->sequence = SEQ_CODE; // Detect the file format. For now this is simple, since if // it doesn't start with 0xFD (the first magic byte of the // new format), it has to be LZMA_Alone, or something that // we don't support at all. if (in[*in_pos] == 0xFD) { return_if_error(lzma_stream_decoder_init( &coder->next, allocator, coder->memlimit, coder->flags)); } else { return_if_error(lzma_alone_decoder_init(&coder->next, - allocator, coder->memlimit)); + allocator, coder->memlimit, true)); // If the application wants to know about missing // integrity check or about the check in general, we // need to handle it here, because LZMA_Alone decoder // doesn't accept any flags. if (coder->flags & LZMA_TELL_NO_CHECK) return LZMA_NO_CHECK; if (coder->flags & LZMA_TELL_ANY_CHECK) return LZMA_GET_CHECK; } // Fall through case SEQ_CODE: { const lzma_ret ret = coder->next.code( coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); if (ret != LZMA_STREAM_END || (coder->flags & LZMA_CONCATENATED) == 0) return ret; coder->sequence = SEQ_FINISH; } // Fall through case SEQ_FINISH: // When LZMA_DECODE_CONCATENATED was used and we were decoding // LZMA_Alone file, we need to check check that there is no // trailing garbage and wait for LZMA_FINISH. if (*in_pos < in_size) return LZMA_DATA_ERROR; return action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK; default: assert(0); return LZMA_PROG_ERROR; } } static void auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { lzma_next_end(&coder->next, allocator); lzma_free(coder, allocator); return; } static lzma_check auto_decoder_get_check(const lzma_coder *coder) { // It is LZMA_Alone if get_check is NULL. return coder->next.get_check == NULL ? LZMA_CHECK_NONE : coder->next.get_check(coder->next.coder); } static lzma_ret auto_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, uint64_t *old_memlimit, uint64_t new_memlimit) { lzma_ret ret; if (coder->next.memconfig != NULL) { ret = coder->next.memconfig(coder->next.coder, memusage, old_memlimit, new_memlimit); assert(*old_memlimit == coder->memlimit); } else { // No coder is configured yet. Use the base value as // the current memory usage. *memusage = LZMA_MEMUSAGE_BASE; *old_memlimit = coder->memlimit; ret = LZMA_OK; } if (ret == LZMA_OK && new_memlimit != 0) coder->memlimit = new_memlimit; return ret; } static lzma_ret auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, uint64_t memlimit, uint32_t flags) { lzma_next_coder_init(&auto_decoder_init, next, allocator); if (memlimit == 0) return LZMA_PROG_ERROR; if (flags & ~LZMA_SUPPORTED_FLAGS) return LZMA_OPTIONS_ERROR; if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) return LZMA_MEM_ERROR; next->code = &auto_decode; next->end = &auto_decoder_end; next->get_check = &auto_decoder_get_check; next->memconfig = &auto_decoder_memconfig; next->coder->next = LZMA_NEXT_CODER_INIT; } next->coder->memlimit = memlimit; next->coder->flags = flags; next->coder->sequence = SEQ_INIT; return LZMA_OK; } extern LZMA_API(lzma_ret) lzma_auto_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags) { lzma_next_strm_init(auto_decoder_init, strm, memlimit, flags); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; } diff --git a/src/lzmainfo/lzmainfo.1 b/src/lzmainfo/lzmainfo.1 index f2b93b4c643f..ce38eee50324 100644 --- a/src/lzmainfo/lzmainfo.1 +++ b/src/lzmainfo/lzmainfo.1 @@ -1,60 +1,60 @@ .\" .\" Author: Lasse Collin .\" .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH LZMAINFO 1 "2010-09-27" "Tukaani" "XZ Utils" +.TH LZMAINFO 1 "2013-06-30" "Tukaani" "XZ Utils" .SH NAME lzmainfo \- show information stored in the .lzma file header .SH SYNOPSIS .B lzmainfo .RB [ \-\-help ] .RB [ \-\-version ] -.RI [ file ]... +.RI [ file... ] .SH DESCRIPTION .B lzmainfo shows information stored in the .B .lzma file header. It reads the first 13 bytes from the specified .IR file , decodes the header, and prints it to standard output in human readable format. If no .I files are given or .I file is .BR \- , standard input is read. .PP Usually the most interesting information is the uncompressed size and the dictionary size. Uncompressed size can be shown only if the file is in the non-streamed .B .lzma format variant. The amount of memory required to decompress the file is a few dozen kilobytes plus the dictionary size. .PP .B lzmainfo is included in XZ Utils primarily for backward compatibility with LZMA Utils. .SH "EXIT STATUS" .TP .B 0 All is good. .TP .B 1 An error occurred. .SH BUGS .B lzmainfo uses .B MB while the correct suffix would be .B MiB (2^20 bytes). This is to keep the output compatible with LZMA Utils. .SH "SEE ALSO" .BR xz (1) diff --git a/src/xz/coder.c b/src/xz/coder.c index 69b1225d946d..e0867e2aa295 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -1,666 +1,673 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file coder.c /// \brief Compresses or uncompresses a file // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" /// Return value type for coder_init(). enum coder_init_ret { CODER_INIT_NORMAL, CODER_INIT_PASSTHRU, CODER_INIT_ERROR, }; enum operation_mode opt_mode = MODE_COMPRESS; enum format_type opt_format = FORMAT_AUTO; bool opt_auto_adjust = true; /// Stream used to communicate with liblzma static lzma_stream strm = LZMA_STREAM_INIT; /// Filters needed for all encoding all formats, and also decoding in raw data static lzma_filter filters[LZMA_FILTERS_MAX + 1]; /// Input and output buffers static io_buf in_buf; static io_buf out_buf; /// Number of filters. Zero indicates that we are using a preset. -static size_t filters_count = 0; +static uint32_t filters_count = 0; /// Number of the preset (0-9) -static size_t preset_number = 6; - -/// If a preset is used (no custom filter chain) and preset_extreme is true, -/// a significantly slower compression is used to achieve slightly better -/// compression ratio. -static bool preset_extreme = false; +static uint32_t preset_number = LZMA_PRESET_DEFAULT; /// Integrity check type static lzma_check check; /// This becomes false if the --check=CHECK option is used. static bool check_default = true; extern void coder_set_check(lzma_check new_check) { check = new_check; check_default = false; return; } -extern void -coder_set_preset(size_t new_preset) +static void +forget_filter_chain(void) { - preset_number = new_preset; - // Setting a preset makes us forget a possibly defined custom // filter chain. while (filters_count > 0) { --filters_count; free(filters[filters_count].options); filters[filters_count].options = NULL; } return; } +extern void +coder_set_preset(uint32_t new_preset) +{ + preset_number &= ~LZMA_PRESET_LEVEL_MASK; + preset_number |= new_preset; + forget_filter_chain(); + return; +} + + extern void coder_set_extreme(void) { - preset_extreme = true; + preset_number |= LZMA_PRESET_EXTREME; + forget_filter_chain(); return; } extern void coder_add_filter(lzma_vli id, void *options) { if (filters_count == LZMA_FILTERS_MAX) message_fatal(_("Maximum number of filters is four")); filters[filters_count].id = id; filters[filters_count].options = options; ++filters_count; + // Setting a custom filter chain makes us forget the preset options. + // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" + // where the custom filter chain resets the preset level back to + // the default 6, making the example equivalent to "xz -6e". + preset_number = LZMA_PRESET_DEFAULT; + return; } static void lzma_attribute((__noreturn__)) memlimit_too_small(uint64_t memory_usage) { message(V_ERROR, _("Memory usage limit is too low for the given " "filter setup.")); message_mem_needed(V_ERROR, memory_usage); tuklib_exit(E_ERROR, E_ERROR, false); } extern void coder_set_compression_settings(void) { // Options for LZMA1 or LZMA2 in case we are using a preset. static lzma_options_lzma opt_lzma; if (filters_count == 0) { // We are using a preset. This is not a good idea in raw mode // except when playing around with things. Different versions // of this software may use different options in presets, and // thus make uncompressing the raw data difficult. if (opt_format == FORMAT_RAW) { // The message is shown only if warnings are allowed // but the exit status isn't changed. message(V_WARNING, _("Using a preset in raw mode " "is discouraged.")); message(V_WARNING, _("The exact options of the " "presets may vary between software " "versions.")); } // Get the preset for LZMA1 or LZMA2. - if (preset_extreme) - preset_number |= LZMA_PRESET_EXTREME; - if (lzma_lzma_preset(&opt_lzma, preset_number)) message_bug(); // Use LZMA2 except with --format=lzma we use LZMA1. filters[0].id = opt_format == FORMAT_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; filters[0].options = &opt_lzma; filters_count = 1; } // Terminate the filter options array. filters[filters_count].id = LZMA_VLI_UNKNOWN; // If we are using the .lzma format, allow exactly one filter // which has to be LZMA1. if (opt_format == FORMAT_LZMA && (filters_count != 1 || filters[0].id != LZMA_FILTER_LZMA1)) message_fatal(_("The .lzma format supports only " "the LZMA1 filter")); // If we are using the .xz format, make sure that there is no LZMA1 // filter to prevent LZMA_PROG_ERROR. if (opt_format == FORMAT_XZ) for (size_t i = 0; i < filters_count; ++i) if (filters[i].id == LZMA_FILTER_LZMA1) message_fatal(_("LZMA1 cannot be used " "with the .xz format")); // Print the selected filter chain. message_filters_show(V_DEBUG, filters); // If using --format=raw, we can be decoding. The memusage function // also validates the filter chain and the options used for the // filters. const uint64_t memory_limit = hardware_memlimit_get(opt_mode); uint64_t memory_usage; if (opt_mode == MODE_COMPRESS) memory_usage = lzma_raw_encoder_memusage(filters); else memory_usage = lzma_raw_decoder_memusage(filters); if (memory_usage == UINT64_MAX) message_fatal(_("Unsupported filter chain or filter options")); // Print memory usage info before possible dictionary // size auto-adjusting. message_mem_needed(V_DEBUG, memory_usage); if (opt_mode == MODE_COMPRESS) { const uint64_t decmem = lzma_raw_decoder_memusage(filters); if (decmem != UINT64_MAX) message(V_DEBUG, _("Decompression will need " "%s MiB of memory."), uint64_to_str( round_up_to_mib(decmem), 0)); } if (memory_usage > memory_limit) { // If --no-auto-adjust was used or we didn't find LZMA1 or // LZMA2 as the last filter, give an error immediately. // --format=raw implies --no-auto-adjust. if (!opt_auto_adjust || opt_format == FORMAT_RAW) memlimit_too_small(memory_usage); assert(opt_mode == MODE_COMPRESS); // Look for the last filter if it is LZMA2 or LZMA1, so // we can make it use less RAM. With other filters we don't // know what to do. size_t i = 0; while (filters[i].id != LZMA_FILTER_LZMA2 && filters[i].id != LZMA_FILTER_LZMA1) { if (filters[i].id == LZMA_VLI_UNKNOWN) memlimit_too_small(memory_usage); ++i; } // Decrease the dictionary size until we meet the memory // usage limit. First round down to full mebibytes. lzma_options_lzma *opt = filters[i].options; const uint32_t orig_dict_size = opt->dict_size; opt->dict_size &= ~((UINT32_C(1) << 20) - 1); while (true) { // If it is below 1 MiB, auto-adjusting failed. We // could be more sophisticated and scale it down even // more, but let's see if many complain about this // version. // // FIXME: Displays the scaled memory usage instead // of the original. if (opt->dict_size < (UINT32_C(1) << 20)) memlimit_too_small(memory_usage); memory_usage = lzma_raw_encoder_memusage(filters); if (memory_usage == UINT64_MAX) message_bug(); // Accept it if it is low enough. if (memory_usage <= memory_limit) break; // Otherwise 1 MiB down and try again. I hope this // isn't too slow method for cases where the original // dict_size is very big. opt->dict_size -= UINT32_C(1) << 20; } // Tell the user that we decreased the dictionary size. message(V_WARNING, _("Adjusted LZMA%c dictionary size " "from %s MiB to %s MiB to not exceed " "the memory usage limit of %s MiB"), filters[i].id == LZMA_FILTER_LZMA2 ? '2' : '1', uint64_to_str(orig_dict_size >> 20, 0), uint64_to_str(opt->dict_size >> 20, 1), uint64_to_str(round_up_to_mib( memory_limit), 2)); } /* // Limit the number of worker threads so that memory usage // limit isn't exceeded. assert(memory_usage > 0); size_t thread_limit = memory_limit / memory_usage; if (thread_limit == 0) thread_limit = 1; if (opt_threads > thread_limit) opt_threads = thread_limit; */ if (check_default) { // The default check type is CRC64, but fallback to CRC32 // if CRC64 isn't supported by the copy of liblzma we are // using. CRC32 is always supported. check = LZMA_CHECK_CRC64; if (!lzma_check_is_supported(check)) check = LZMA_CHECK_CRC32; } return; } /// Return true if the data in in_buf seems to be in the .xz format. static bool is_format_xz(void) { // Specify the magic as hex to be compatible with EBCDIC systems. static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; return strm.avail_in >= sizeof(magic) && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; } /// Return true if the data in in_buf seems to be in the .lzma format. static bool is_format_lzma(void) { // The .lzma header is 13 bytes. if (strm.avail_in < 13) return false; // Decode the LZMA1 properties. lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) return false; // A hack to ditch tons of false positives: We allow only dictionary // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone // created only files with 2^n, but accepts any dictionary size. // If someone complains, this will be reconsidered. lzma_options_lzma *opt = filter.options; const uint32_t dict_size = opt->dict_size; free(opt); if (dict_size != UINT32_MAX) { uint32_t d = dict_size - 1; d |= d >> 2; d |= d >> 3; d |= d >> 4; d |= d >> 8; d |= d >> 16; ++d; if (d != dict_size || dict_size == 0) return false; } // Another hack to ditch false positives: Assume that if the // uncompressed size is known, it must be less than 256 GiB. // Again, if someone complains, this will be reconsidered. uint64_t uncompressed_size = 0; for (size_t i = 0; i < 8; ++i) uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); if (uncompressed_size != UINT64_MAX && uncompressed_size > (UINT64_C(1) << 38)) return false; return true; } /// Detect the input file type (for now, this done only when decompressing), /// and initialize an appropriate coder. Return value indicates if a normal /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred /// (CODER_INIT_ERROR). static enum coder_init_ret coder_init(file_pair *pair) { lzma_ret ret = LZMA_PROG_ERROR; if (opt_mode == MODE_COMPRESS) { switch (opt_format) { case FORMAT_AUTO: // args.c ensures this. assert(0); break; case FORMAT_XZ: ret = lzma_stream_encoder(&strm, filters, check); break; case FORMAT_LZMA: ret = lzma_alone_encoder(&strm, filters[0].options); break; case FORMAT_RAW: ret = lzma_raw_encoder(&strm, filters); break; } } else { const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED; // We abuse FORMAT_AUTO to indicate unknown file format, // for which we may consider passthru mode. enum format_type init_format = FORMAT_AUTO; switch (opt_format) { case FORMAT_AUTO: if (is_format_xz()) init_format = FORMAT_XZ; else if (is_format_lzma()) init_format = FORMAT_LZMA; break; case FORMAT_XZ: if (is_format_xz()) init_format = FORMAT_XZ; break; case FORMAT_LZMA: if (is_format_lzma()) init_format = FORMAT_LZMA; break; case FORMAT_RAW: init_format = FORMAT_RAW; break; } switch (init_format) { case FORMAT_AUTO: // Uknown file format. If --decompress --stdout // --force have been given, then we copy the input // as is to stdout. Checking for MODE_DECOMPRESS // is needed, because we don't want to do use // passthru mode with --test. if (opt_mode == MODE_DECOMPRESS && opt_stdout && opt_force) return CODER_INIT_PASSTHRU; ret = LZMA_FORMAT_ERROR; break; case FORMAT_XZ: ret = lzma_stream_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS), flags); break; case FORMAT_LZMA: ret = lzma_alone_decoder(&strm, hardware_memlimit_get( MODE_DECOMPRESS)); break; case FORMAT_RAW: // Memory usage has already been checked in // coder_set_compression_settings(). ret = lzma_raw_decoder(&strm, filters); break; } // Try to decode the headers. This will catch too low // memory usage limit in case it happens in the first // Block of the first Stream, which is where it very // probably will happen if it is going to happen. if (ret == LZMA_OK && init_format != FORMAT_RAW) { strm.next_out = NULL; strm.avail_out = 0; ret = lzma_code(&strm, LZMA_RUN); } } if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); if (ret == LZMA_MEMLIMIT_ERROR) message_mem_needed(V_ERROR, lzma_memusage(&strm)); return CODER_INIT_ERROR; } return CODER_INIT_NORMAL; } /// Compress or decompress using liblzma. static bool coder_normal(file_pair *pair) { // Encoder needs to know when we have given all the input to it. // The decoders need to know it too when we are using // LZMA_CONCATENATED. We need to check for src_eof here, because // the first input chunk has been already read, and that may // have been the only chunk we will read. lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; lzma_ret ret; // Assume that something goes wrong. bool success = false; strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; while (!user_abort) { // Fill the input buffer if it is empty and we haven't reached // end of file yet. if (strm.avail_in == 0 && !pair->src_eof) { strm.next_in = in_buf.u8; strm.avail_in = io_read( pair, &in_buf, IO_BUFFER_SIZE); if (strm.avail_in == SIZE_MAX) break; if (pair->src_eof) action = LZMA_FINISH; } // Let liblzma do the actual work. ret = lzma_code(&strm, action); // Write out if the output buffer became full. if (strm.avail_out == 0) { if (opt_mode != MODE_TEST && io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) break; strm.next_out = out_buf.u8; strm.avail_out = IO_BUFFER_SIZE; } if (ret != LZMA_OK) { // Determine if the return value indicates that we // won't continue coding. const bool stop = ret != LZMA_NO_CHECK && ret != LZMA_UNSUPPORTED_CHECK; if (stop) { // Write the remaining bytes even if something // went wrong, because that way the user gets // as much data as possible, which can be good // when trying to get at least some useful // data out of damaged files. if (opt_mode != MODE_TEST && io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) break; } if (ret == LZMA_STREAM_END) { // Check that there is no trailing garbage. // This is needed for LZMA_Alone and raw // streams. if (strm.avail_in == 0 && !pair->src_eof) { // Try reading one more byte. // Hopefully we don't get any more // input, and thus pair->src_eof // becomes true. strm.avail_in = io_read( pair, &in_buf, 1); if (strm.avail_in == SIZE_MAX) break; assert(strm.avail_in == 0 || strm.avail_in == 1); } if (strm.avail_in == 0) { assert(pair->src_eof); success = true; break; } // We hadn't reached the end of the file. ret = LZMA_DATA_ERROR; assert(stop); } // If we get here and stop is true, something went // wrong and we print an error. Otherwise it's just // a warning and coding can continue. if (stop) { message_error("%s: %s", pair->src_name, message_strm(ret)); } else { message_warning("%s: %s", pair->src_name, message_strm(ret)); // When compressing, all possible errors set // stop to true. assert(opt_mode != MODE_COMPRESS); } if (ret == LZMA_MEMLIMIT_ERROR) { // Display how much memory it would have // actually needed. message_mem_needed(V_ERROR, lzma_memusage(&strm)); } if (stop) break; } // Show progress information under certain conditions. message_progress_update(); } return success; } /// Copy from input file to output file without processing the data in any /// way. This is used only when trying to decompress unrecognized files /// with --decompress --stdout --force, so the output is always stdout. static bool coder_passthru(file_pair *pair) { while (strm.avail_in != 0) { if (user_abort) return false; if (io_write(pair, &in_buf, strm.avail_in)) return false; strm.total_in += strm.avail_in; strm.total_out = strm.total_in; message_progress_update(); strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); if (strm.avail_in == SIZE_MAX) return false; } return true; } extern void coder_run(const char *filename) { // Set and possibly print the filename for the progress message. message_filename(filename); // Try to open the input file. file_pair *pair = io_open_src(filename); if (pair == NULL) return; // Assume that something goes wrong. bool success = false; // Read the first chunk of input data. This is needed to detect // the input file type (for now, only for decompression). strm.next_in = in_buf.u8; strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); if (strm.avail_in != SIZE_MAX) { // Initialize the coder. This will detect the file format // and, in decompression or testing mode, check the memory // usage of the first Block too. This way we don't try to // open the destination file if we see that coding wouldn't // work at all anyway. This also avoids deleting the old // "target" file if --force was used. const enum coder_init_ret init_ret = coder_init(pair); if (init_ret != CODER_INIT_ERROR && !user_abort) { // Don't open the destination file when --test // is used. if (opt_mode == MODE_TEST || !io_open_dest(pair)) { // Initialize the progress indicator. const uint64_t in_size = pair->src_st.st_size <= 0 ? 0 : pair->src_st.st_size; message_progress_start(&strm, in_size); // Do the actual coding or passthru. if (init_ret == CODER_INIT_NORMAL) success = coder_normal(pair); else success = coder_passthru(pair); message_progress_end(success); } } } // Close the file pair. It needs to know if coding was successful to // know if the source or target file should be unlinked. io_close(pair, success); return; } diff --git a/src/xz/coder.h b/src/xz/coder.h index 4626466f39ce..2d3add972745 100644 --- a/src/xz/coder.h +++ b/src/xz/coder.h @@ -1,61 +1,61 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file coder.h /// \brief Compresses or uncompresses a file // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// enum operation_mode { MODE_COMPRESS, MODE_DECOMPRESS, MODE_TEST, MODE_LIST, }; // NOTE: The order of these is significant in suffix.c. enum format_type { FORMAT_AUTO, FORMAT_XZ, FORMAT_LZMA, // HEADER_GZIP, FORMAT_RAW, }; /// Operation mode of the command line tool. This is set in args.c and read /// in several files. extern enum operation_mode opt_mode; /// File format to use when encoding or what format(s) to accept when /// decoding. This is a global because it's needed also in suffix.c. /// This is set in args.c. extern enum format_type opt_format; /// If true, the compression settings are automatically adjusted down if /// they exceed the memory usage limit. extern bool opt_auto_adjust; /// Set the integrity check type used when compressing extern void coder_set_check(lzma_check check); /// Set preset number -extern void coder_set_preset(size_t new_preset); +extern void coder_set_preset(uint32_t new_preset); /// Enable extreme mode extern void coder_set_extreme(void); /// Add a filter to the custom filter chain extern void coder_add_filter(lzma_vli id, void *options); /// extern void coder_set_compression_settings(void); /// Compress or decompress the given file extern void coder_run(const char *filename); diff --git a/src/xz/file_io.c b/src/xz/file_io.c index f9b7f305e09c..871a099b6193 100644 --- a/src/xz/file_io.c +++ b/src/xz/file_io.c @@ -1,957 +1,955 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file file_io.c /// \brief File opening, unlinking, and closing // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include #ifdef TUKLIB_DOSLIKE # include #else static bool warn_fchown; #endif #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) # include #elif defined(HAVE_UTIME) # include #endif #include "tuklib_open_stdxxx.h" #ifndef O_BINARY # define O_BINARY 0 #endif #ifndef O_NOCTTY # define O_NOCTTY 0 #endif /// If true, try to create sparse files when decompressing. static bool try_sparse = true; #ifndef TUKLIB_DOSLIKE -/// File status flags of standard output. This is used by io_open_dest() -/// and io_close_dest(). -static int stdout_flags = 0; +/// Original file status flags of standard output. This is used by +/// io_open_dest() and io_close_dest() to save and restore the flags. +static int stdout_flags; +static bool restore_stdout_flags = false; #endif static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); extern void io_init(void) { // Make sure that stdin, stdout, and stderr are connected to // a valid file descriptor. Exit immediately with exit code ERROR // if we cannot make the file descriptors valid. Maybe we should // print an error message, but our stderr could be screwed anyway. tuklib_open_stdxxx(E_ERROR); #ifndef TUKLIB_DOSLIKE // If fchown() fails setting the owner, we warn about it only if // we are root. warn_fchown = geteuid() == 0; #endif #ifdef __DJGPP__ // Avoid doing useless things when statting files. // This isn't important but doesn't hurt. _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; #endif return; } extern void io_no_sparse(void) { try_sparse = false; return; } /// \brief Unlink a file /// /// This tries to verify that the file being unlinked really is the file that /// we want to unlink by verifying device and inode numbers. There's still /// a small unavoidable race, but this is much better than nothing (the file /// could have been moved/replaced even hours earlier). static void io_unlink(const char *name, const struct stat *known_st) { #if defined(TUKLIB_DOSLIKE) // On DOS-like systems, st_ino is meaningless, so don't bother // testing it. Just silence a compiler warning. (void)known_st; #else struct stat new_st; // If --force was used, use stat() instead of lstat(). This way // (de)compressing symlinks works correctly. However, it also means // that xz cannot detect if a regular file foo is renamed to bar // and then a symlink foo -> bar is created. Because of stat() // instead of lstat(), xz will think that foo hasn't been replaced // with another file. Thus, xz will remove foo even though it no // longer is the same file that xz used when it started compressing. // Probably it's not too bad though, so this doesn't need a more // complex fix. const int stat_ret = opt_force ? stat(name, &new_st) : lstat(name, &new_st); if (stat_ret # ifdef __VMS // st_ino is an array, and we don't want to // compare st_dev at all. || memcmp(&new_st.st_ino, &known_st->st_ino, sizeof(new_st.st_ino)) != 0 # else // Typical POSIX-like system || new_st.st_dev != known_st->st_dev || new_st.st_ino != known_st->st_ino # endif ) // TRANSLATORS: When compression or decompression finishes, // and xz is going to remove the source file, xz first checks // if the source file still exists, and if it does, does its // device and inode numbers match what xz saw when it opened // the source file. If these checks fail, this message is // shown, %s being the filename, and the file is not deleted. // The check for device and inode numbers is there, because // it is possible that the user has put a new file in place // of the original file, and in that case it obviously // shouldn't be removed. message_error(_("%s: File seems to have been moved, " "not removing"), name); else #endif // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. if (unlink(name)) message_error(_("%s: Cannot remove: %s"), name, strerror(errno)); return; } /// \brief Copies owner/group and permissions /// /// \todo ACL and EA support /// static void io_copy_attrs(const file_pair *pair) { // Skip chown and chmod on Windows. #ifndef TUKLIB_DOSLIKE // This function is more tricky than you may think at first. // Blindly copying permissions may permit users to access the // destination file who didn't have permission to access the // source file. // Try changing the owner of the file. If we aren't root or the owner // isn't already us, fchown() probably doesn't succeed. We warn // about failing fchown() only if we are root. if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) message_warning(_("%s: Cannot set the file owner: %s"), pair->dest_name, strerror(errno)); mode_t mode; if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { message_warning(_("%s: Cannot set the file group: %s"), pair->dest_name, strerror(errno)); // We can still safely copy some additional permissions: // `group' must be at least as strict as `other' and // also vice versa. // // NOTE: After this, the owner of the source file may // get additional permissions. This shouldn't be too bad, // because the owner would have had permission to chmod // the original file anyway. mode = ((pair->src_st.st_mode & 0070) >> 3) & (pair->src_st.st_mode & 0007); mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; } else { // Drop the setuid, setgid, and sticky bits. mode = pair->src_st.st_mode & 0777; } if (fchmod(pair->dest_fd, mode)) message_warning(_("%s: Cannot set the file permissions: %s"), pair->dest_name, strerror(errno)); #endif // Copy the timestamps. We have several possible ways to do this, of // which some are better in both security and precision. // // First, get the nanosecond part of the timestamps. As of writing, // it's not standardized by POSIX, and there are several names for // the same thing in struct stat. long atime_nsec; long mtime_nsec; # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) // GNU and Solaris atime_nsec = pair->src_st.st_atim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) // BSD atime_nsec = pair->src_st.st_atimespec.tv_nsec; mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) // GNU and BSD without extensions atime_nsec = pair->src_st.st_atimensec; mtime_nsec = pair->src_st.st_mtimensec; # elif defined(HAVE_STRUCT_STAT_ST_UATIME) // Tru64 atime_nsec = pair->src_st.st_uatime * 1000; mtime_nsec = pair->src_st.st_umtime * 1000; # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) // UnixWare atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; # else // Safe fallback atime_nsec = 0; mtime_nsec = 0; # endif // Construct a structure to hold the timestamps and call appropriate // function to set the timestamps. #if defined(HAVE_FUTIMENS) // Use nanosecond precision. struct timespec tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_nsec = atime_nsec; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_nsec = mtime_nsec; (void)futimens(pair->dest_fd, tv); #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) // Use microsecond precision. struct timeval tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_usec = atime_nsec / 1000; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_usec = mtime_nsec / 1000; # if defined(HAVE_FUTIMES) (void)futimes(pair->dest_fd, tv); # elif defined(HAVE_FUTIMESAT) (void)futimesat(pair->dest_fd, NULL, tv); # else // Argh, no function to use a file descriptor to set the timestamp. (void)utimes(pair->dest_name, tv); # endif #elif defined(HAVE_UTIME) // Use one-second precision. utime() doesn't support using file // descriptor either. Some systems have broken utime() prototype // so don't make this const. struct utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)utime(pair->dest_name, &buf); #endif return; } /// Opens the source file. Returns false on success, true on error. static bool io_open_src_real(file_pair *pair) { // There's nothing to open when reading from stdin. if (pair->src_name == stdin_filename) { pair->src_fd = STDIN_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDIN_FILENO, O_BINARY); #endif return false; } // Symlinks are not followed unless writing to stdout or --force // was used. const bool follow_symlinks = opt_stdout || opt_force; // We accept only regular files if we are writing the output // to disk too. bzip2 allows overriding this with --force but // gzip and xz don't. const bool reg_files_only = !opt_stdout; // Flags for open() int flags = O_RDONLY | O_BINARY | O_NOCTTY; #ifndef TUKLIB_DOSLIKE // If we accept only regular files, we need to be careful to avoid // problems with special files like devices and FIFOs. O_NONBLOCK // prevents blocking when opening such files. When we want to accept // special files, we must not use O_NONBLOCK, or otherwise we won't // block waiting e.g. FIFOs to become readable. if (reg_files_only) flags |= O_NONBLOCK; #endif #if defined(O_NOFOLLOW) if (!follow_symlinks) flags |= O_NOFOLLOW; #elif !defined(TUKLIB_DOSLIKE) // Some POSIX-like systems lack O_NOFOLLOW (it's not required // by POSIX). Check for symlinks with a separate lstat() on // these systems. if (!follow_symlinks) { struct stat st; if (lstat(pair->src_name, &st)) { message_error("%s: %s", pair->src_name, strerror(errno)); return true; } else if (S_ISLNK(st.st_mode)) { message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); return true; } } #else // Avoid warnings. (void)follow_symlinks; #endif // Try to open the file. If we are accepting non-regular files, // unblock the caught signals so that open() can be interrupted // if it blocks e.g. due to a FIFO file. if (!reg_files_only) signals_unblock(); // Maybe this wouldn't need a loop, since all the signal handlers for // which we don't use SA_RESTART set user_abort to true. But it // doesn't hurt to have it just in case. do { pair->src_fd = open(pair->src_name, flags); } while (pair->src_fd == -1 && errno == EINTR && !user_abort); if (!reg_files_only) signals_block(); if (pair->src_fd == -1) { // If we were interrupted, don't display any error message. if (errno == EINTR) { // All the signals that don't have SA_RESTART // set user_abort. assert(user_abort); return true; } #ifdef O_NOFOLLOW // Give an understandable error message if the reason // for failing was that the file was a symbolic link. // // Note that at least Linux, OpenBSD, Solaris, and Darwin // use ELOOP to indicate that O_NOFOLLOW was the reason // that open() failed. Because there may be // directories in the pathname, ELOOP may occur also // because of a symlink loop in the directory part. // So ELOOP doesn't tell us what actually went wrong, // and this stupidity went into POSIX-1.2008 too. // // FreeBSD associates EMLINK with O_NOFOLLOW and // Tru64 uses ENOTSUP. We use these directly here // and skip the lstat() call and the associated race. // I want to hear if there are other kernels that // fail with something else than ELOOP with O_NOFOLLOW. bool was_symlink = false; # if defined(__FreeBSD__) || defined(__DragonFly__) if (errno == EMLINK) was_symlink = true; # elif defined(__digital__) && defined(__unix__) if (errno == ENOTSUP) was_symlink = true; # elif defined(__NetBSD__) - // As of 2010-09-05, NetBSD doesn't document what errno is - // used with O_NOFOLLOW. It is EFTYPE though, and I - // understood that is very unlikely to change even though - // it is undocumented. if (errno == EFTYPE) was_symlink = true; # else if (errno == ELOOP && !follow_symlinks) { const int saved_errno = errno; struct stat st; if (lstat(pair->src_name, &st) == 0 && S_ISLNK(st.st_mode)) was_symlink = true; errno = saved_errno; } # endif if (was_symlink) message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); else #endif // Something else than O_NOFOLLOW failing // (assuming that the race conditions didn't // confuse us). message_error("%s: %s", pair->src_name, strerror(errno)); return true; } #ifndef TUKLIB_DOSLIKE // Drop O_NONBLOCK, which is used only when we are accepting only // regular files. After the open() call, we want things to block // instead of giving EAGAIN. if (reg_files_only) { flags = fcntl(pair->src_fd, F_GETFL); if (flags == -1) goto error_msg; flags &= ~O_NONBLOCK; - if (fcntl(pair->src_fd, F_SETFL, flags)) + if (fcntl(pair->src_fd, F_SETFL, flags) == -1) goto error_msg; } #endif // Stat the source file. We need the result also when we copy // the permissions, and when unlinking. if (fstat(pair->src_fd, &pair->src_st)) goto error_msg; if (S_ISDIR(pair->src_st.st_mode)) { message_warning(_("%s: Is a directory, skipping"), pair->src_name); goto error; } if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { message_warning(_("%s: Not a regular file, skipping"), pair->src_name); goto error; } #ifndef TUKLIB_DOSLIKE if (reg_files_only && !opt_force) { if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { // gzip rejects setuid and setgid files even // when --force was used. bzip2 doesn't check // for them, but calls fchown() after fchmod(), // and many systems automatically drop setuid // and setgid bits there. // // We accept setuid and setgid files if // --force was used. We drop these bits // explicitly in io_copy_attr(). message_warning(_("%s: File has setuid or " "setgid bit set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_mode & S_ISVTX) { message_warning(_("%s: File has sticky bit " "set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_nlink > 1) { message_warning(_("%s: Input file has more " "than one hard link, " "skipping"), pair->src_name); goto error; } } #endif return false; error_msg: message_error("%s: %s", pair->src_name, strerror(errno)); error: (void)close(pair->src_fd); return true; } extern file_pair * io_open_src(const char *src_name) { if (is_empty_filename(src_name)) return NULL; // Since we have only one file open at a time, we can use // a statically allocated structure. static file_pair pair; pair = (file_pair){ .src_name = src_name, .dest_name = NULL, .src_fd = -1, .dest_fd = -1, .src_eof = false, .dest_try_sparse = false, .dest_pending_sparse = 0, }; // Block the signals, for which we have a custom signal handler, so // that we don't need to worry about EINTR. signals_block(); const bool error = io_open_src_real(&pair); signals_unblock(); return error ? NULL : &pair; } /// \brief Closes source file of the file_pair structure /// /// \param pair File whose src_fd should be closed /// \param success If true, the file will be removed from the disk if /// closing succeeds and --keep hasn't been used. static void io_close_src(file_pair *pair, bool success) { if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { #ifdef TUKLIB_DOSLIKE (void)close(pair->src_fd); #endif // If we are going to unlink(), do it before closing the file. // This way there's no risk that someone replaces the file and // happens to get same inode number, which would make us // unlink() wrong file. // // NOTE: DOS-like systems are an exception to this, because // they don't allow unlinking files that are open. *sigh* if (success && !opt_keep_original) io_unlink(pair->src_name, &pair->src_st); #ifndef TUKLIB_DOSLIKE (void)close(pair->src_fd); #endif } return; } static bool io_open_dest_real(file_pair *pair) { if (opt_stdout || pair->src_fd == STDIN_FILENO) { // We don't modify or free() this. pair->dest_name = (char *)"(stdout)"; pair->dest_fd = STDOUT_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDOUT_FILENO, O_BINARY); #endif } else { pair->dest_name = suffix_get_dest_name(pair->src_name); if (pair->dest_name == NULL) return true; // If --force was used, unlink the target file first. if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { message_error(_("%s: Cannot remove: %s"), pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } // Open the file. const int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL; const mode_t mode = S_IRUSR | S_IWUSR; pair->dest_fd = open(pair->dest_name, flags, mode); if (pair->dest_fd == -1) { message_error("%s: %s", pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } } // If this really fails... well, we have a safe fallback. if (fstat(pair->dest_fd, &pair->dest_st)) { #if defined(__VMS) pair->dest_st.st_ino[0] = 0; pair->dest_st.st_ino[1] = 0; pair->dest_st.st_ino[2] = 0; #elif !defined(TUKLIB_DOSLIKE) pair->dest_st.st_dev = 0; pair->dest_st.st_ino = 0; #endif #ifndef TUKLIB_DOSLIKE } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { // When writing to standard output, we need to be extra // careful: // - It may be connected to something else than // a regular file. // - We aren't necessarily writing to a new empty file // or to the end of an existing file. // - O_APPEND may be active. // // TODO: I'm keeping this disabled for DOS-like systems // for now. FAT doesn't support sparse files, but NTFS // does, so maybe this should be enabled on Windows after // some testing. if (pair->dest_fd == STDOUT_FILENO) { if (!S_ISREG(pair->dest_st.st_mode)) return false; - const int flags = fcntl(STDOUT_FILENO, F_GETFL); - if (flags == -1) + stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); + if (stdout_flags == -1) return false; - if (flags & O_APPEND) { + if (stdout_flags & O_APPEND) { // Creating a sparse file is not possible // when O_APPEND is active (it's used by // shell's >> redirection). As I understand // it, it is safe to temporarily disable // O_APPEND in xz, because if someone // happened to write to the same file at the // same time, results would be bad anyway // (users shouldn't assume that xz uses any // specific block size when writing data). // // The write position may be something else // than the end of the file, so we must fix // it to start writing at the end of the file // to imitate O_APPEND. if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) return false; if (fcntl(STDOUT_FILENO, F_SETFL, - stdout_flags & ~O_APPEND)) + stdout_flags & ~O_APPEND) + == -1) return false; - // Remember the flags so that io_close_dest() - // can restore them. - stdout_flags = flags; + // Disabling O_APPEND succeeded. Mark + // that the flags should be restored + // in io_close_dest(). + restore_stdout_flags = true; } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) != pair->dest_st.st_size) { // Writing won't start exactly at the end // of the file. We cannot use sparse output, // because it would probably corrupt the file. return false; } } pair->dest_try_sparse = true; #endif } return false; } extern bool io_open_dest(file_pair *pair) { signals_block(); const bool ret = io_open_dest_real(pair); signals_unblock(); return ret; } /// \brief Closes destination file of the file_pair structure /// /// \param pair File whose dest_fd should be closed /// \param success If false, the file will be removed from the disk. /// /// \return Zero if closing succeeds. On error, -1 is returned and /// error message printed. static bool io_close_dest(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE // If io_open_dest() has disabled O_APPEND, restore it here. - if (stdout_flags != 0) { + if (restore_stdout_flags) { assert(pair->dest_fd == STDOUT_FILENO); - const int fail = fcntl(STDOUT_FILENO, F_SETFL, stdout_flags); - stdout_flags = 0; + restore_stdout_flags = false; - if (fail) { + if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { message_error(_("Error restoring the O_APPEND flag " "to standard output: %s"), strerror(errno)); return true; } } #endif if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) return false; if (close(pair->dest_fd)) { message_error(_("%s: Closing the file failed: %s"), pair->dest_name, strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return true; } // If the operation using this file wasn't successful, we git rid // of the junk file. if (!success) io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return false; } extern void io_close(file_pair *pair, bool success) { // Take care of sparseness at the end of the output file. if (success && pair->dest_try_sparse && pair->dest_pending_sparse > 0) { // Seek forward one byte less than the size of the pending // hole, then write one zero-byte. This way the file grows // to its correct size. An alternative would be to use // ftruncate() but that isn't portable enough (e.g. it // doesn't work with FAT on Linux; FAT isn't that important // since it doesn't support sparse files anyway, but we don't // want to create corrupt files on it). if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when trying " "to create a sparse file: %s"), pair->dest_name, strerror(errno)); success = false; } else { const uint8_t zero[1] = { '\0' }; if (io_write_buf(pair, zero, 1)) success = false; } } signals_block(); // Copy the file attributes. We need to skip this if destination // file isn't open or it is standard output. if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); // Close the destination first. If it fails, we must not remove // the source file! if (io_close_dest(pair, success)) success = false; // Close the source file, and unlink it if the operation using this // file pair was successful and we haven't requested to keep the // source file. io_close_src(pair, success); signals_unblock(); return; } extern size_t io_read(file_pair *pair, io_buf *buf_union, size_t size) { // We use small buffers here. assert(size < SSIZE_MAX); uint8_t *buf = buf_union->u8; size_t left = size; while (left > 0) { const ssize_t amount = read(pair->src_fd, buf, left); if (amount == 0) { pair->src_eof = true; break; } if (amount == -1) { if (errno == EINTR) { if (user_abort) return SIZE_MAX; continue; } message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); // FIXME Is this needed? pair->src_eof = true; return SIZE_MAX; } buf += (size_t)(amount); left -= (size_t)(amount); } return size - left; } extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) { // Using lseek() and read() is more portable than pread() and // for us it is as good as real pread(). if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { message_error(_("%s: Error seeking the file: %s"), pair->src_name, strerror(errno)); return true; } const size_t amount = io_read(pair, buf, size); if (amount == SIZE_MAX) return true; if (amount != size) { message_error(_("%s: Unexpected end of file"), pair->src_name); return true; } return false; } static bool is_sparse(const io_buf *buf) { assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) if (buf->u64[i] != 0) return false; return true; } static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) { assert(size < SSIZE_MAX); while (size > 0) { const ssize_t amount = write(pair->dest_fd, buf, size); if (amount == -1) { if (errno == EINTR) { if (user_abort) - return -1; + return true; continue; } // Handle broken pipe specially. gzip and bzip2 // don't print anything on SIGPIPE. In addition, // gzip --quiet uses exit status 2 (warning) on // broken pipe instead of whatever raise(SIGPIPE) // would make it return. It is there to hide "Broken // pipe" message on some old shells (probably old // GNU bash). // // We don't do anything special with --quiet, which // is what bzip2 does too. If we get SIGPIPE, we // will handle it like other signals by setting // user_abort, and get EPIPE here. if (errno != EPIPE) message_error(_("%s: Write error: %s"), pair->dest_name, strerror(errno)); return true; } buf += (size_t)(amount); size -= (size_t)(amount); } return false; } extern bool io_write(file_pair *pair, const io_buf *buf, size_t size) { assert(size <= IO_BUFFER_SIZE); if (pair->dest_try_sparse) { // Check if the block is sparse (contains only zeros). If it // sparse, we just store the amount and return. We will take // care of actually skipping over the hole when we hit the // next data block or close the file. // // Since io_close() requires that dest_pending_sparse > 0 // if the file ends with sparse block, we must also return // if size == 0 to avoid doing the lseek(). if (size == IO_BUFFER_SIZE) { if (is_sparse(buf)) { pair->dest_pending_sparse += size; return false; } } else if (size == 0) { return false; } // This is not a sparse block. If we have a pending hole, // skip it now. if (pair->dest_pending_sparse > 0) { if (lseek(pair->dest_fd, pair->dest_pending_sparse, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when " "trying to create a sparse " "file: %s"), pair->dest_name, strerror(errno)); return true; } pair->dest_pending_sparse = 0; } } return io_write_buf(pair, buf->u8, size); } diff --git a/src/xz/list.c b/src/xz/list.c index 98307eb247ef..0e73d519ea4a 100644 --- a/src/xz/list.c +++ b/src/xz/list.c @@ -1,1109 +1,1135 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file list.c /// \brief Listing information about .xz files // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include "tuklib_integer.h" /// Information about a .xz file typedef struct { /// Combined Index of all Streams in the file lzma_index *idx; /// Total amount of Stream Padding uint64_t stream_padding; /// Highest memory usage so far uint64_t memusage_max; /// True if all Blocks so far have Compressed Size and /// Uncompressed Size fields bool all_have_sizes; } xz_file_info; #define XZ_FILE_INFO_INIT { NULL, 0, 0, true } /// Information about a .xz Block typedef struct { /// Size of the Block Header uint32_t header_size; /// A few of the Block Flags as a string char flags[3]; /// Size of the Compressed Data field in the Block lzma_vli compressed_size; /// Decoder memory usage for this Block uint64_t memusage; /// The filter chain of this Block in human-readable form char filter_chain[FILTERS_STR_SIZE]; } block_header_info; /// Check ID to string mapping static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { // TRANSLATORS: Indicates that there is no integrity check. // This string is used in tables, so the width must not // exceed ten columns with a fixed-width font. N_("None"), "CRC32", // TRANSLATORS: Indicates that integrity check name is not known, // but the Check ID is known (here 2). This and other "Unknown-N" // strings are used in tables, so the width must not exceed ten // columns with a fixed-width font. It's OK to omit the dash if // you need space for one extra letter, but don't use spaces. N_("Unknown-2"), N_("Unknown-3"), "CRC64", N_("Unknown-5"), N_("Unknown-6"), N_("Unknown-7"), N_("Unknown-8"), N_("Unknown-9"), "SHA-256", N_("Unknown-11"), N_("Unknown-12"), N_("Unknown-13"), N_("Unknown-14"), N_("Unknown-15"), }; /// Buffer size for get_check_names(). This may be a bit ridiculous, /// but at least it's enough if some language needs many multibyte chars. #define CHECKS_STR_SIZE 1024 /// Value of the Check field as hexadecimal string. /// This is set by parse_check_value(). static char check_value[2 * LZMA_CHECK_SIZE_MAX + 1]; /// Totals that are displayed if there was more than one file. /// The "files" counter is also used in print_info_adv() to show /// the file number. static struct { uint64_t files; uint64_t streams; uint64_t blocks; uint64_t compressed_size; uint64_t uncompressed_size; uint64_t stream_padding; uint64_t memusage_max; uint32_t checks; bool all_have_sizes; } totals = { 0, 0, 0, 0, 0, 0, 0, 0, true }; /// \brief Parse the Index(es) from the given .xz file /// /// \param xfi Pointer to structure where the decoded information /// is stored. /// \param pair Input file /// /// \return On success, false is returned. On error, true is returned. /// // TODO: This function is pretty big. liblzma should have a function that // takes a callback function to parse the Index(es) from a .xz file to make // it easy for applications. static bool parse_indexes(xz_file_info *xfi, file_pair *pair) { if (pair->src_st.st_size <= 0) { message_error(_("%s: File is empty"), pair->src_name); return true; } if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { message_error(_("%s: Too small to be a valid .xz file"), pair->src_name); return true; } io_buf buf; lzma_stream_flags header_flags; lzma_stream_flags footer_flags; lzma_ret ret; // lzma_stream for the Index decoder lzma_stream strm = LZMA_STREAM_INIT; // All Indexes decoded so far lzma_index *combined_index = NULL; // The Index currently being decoded lzma_index *this_index = NULL; // Current position in the file. We parse the file backwards so // initialize it to point to the end of the file. off_t pos = pair->src_st.st_size; // Each loop iteration decodes one Index. do { // Check that there is enough data left to contain at least // the Stream Header and Stream Footer. This check cannot // fail in the first pass of this loop. if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } pos -= LZMA_STREAM_HEADER_SIZE; lzma_vli stream_padding = 0; // Locate the Stream Footer. There may be Stream Padding which // we must skip when reading backwards. while (true) { if (pos < LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm( LZMA_DATA_ERROR)); goto error; } if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; // Stream Padding is always a multiple of four bytes. int i = 2; if (buf.u32[i] != 0) break; // To avoid calling io_pread() for every four bytes // of Stream Padding, take advantage that we read // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and // check them too before calling io_pread() again. do { stream_padding += 4; pos -= 4; --i; } while (i >= 0 && buf.u32[i] == 0); } // Decode the Stream Footer. ret = lzma_stream_footer_decode(&footer_flags, buf.u8); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } + // Check that the Stream Footer doesn't specify something + // that we don't support. This can only happen if the xz + // version is older than liblzma and liblzma supports + // something new. + // + // It is enough to check Stream Footer. Stream Header must + // match when it is compared against Stream Footer with + // lzma_stream_flags_compare(). + if (footer_flags.version != 0) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_OPTIONS_ERROR)); + goto error; + } + // Check that the size of the Index field looks sane. lzma_vli index_size = footer_flags.backward_size; if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } // Set pos to the beginning of the Index. pos -= index_size; // See how much memory we can use for decoding this Index. uint64_t memlimit = hardware_memlimit_get(MODE_LIST); uint64_t memused = 0; if (combined_index != NULL) { memused = lzma_index_memused(combined_index); if (memused > memlimit) message_bug(); memlimit -= memused; } // Decode the Index. ret = lzma_index_decoder(&strm, &this_index, memlimit); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } do { // Don't give the decoder more input than the // Index size. strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); if (io_pread(pair, &buf, strm.avail_in, pos)) goto error; pos += strm.avail_in; index_size -= strm.avail_in; strm.next_in = buf.u8; ret = lzma_code(&strm, LZMA_RUN); } while (ret == LZMA_OK); // If the decoding seems to be successful, check also that // the Index decoder consumed as much input as indicated // by the Backward Size field. if (ret == LZMA_STREAM_END) if (index_size != 0 || strm.avail_in != 0) ret = LZMA_DATA_ERROR; if (ret != LZMA_STREAM_END) { // LZMA_BUFFER_ERROR means that the Index decoder // would have liked more input than what the Index // size should be according to Stream Footer. // The message for LZMA_DATA_ERROR makes more // sense in that case. if (ret == LZMA_BUF_ERROR) ret = LZMA_DATA_ERROR; message_error("%s: %s", pair->src_name, message_strm(ret)); // If the error was too low memory usage limit, // show also how much memory would have been needed. if (ret == LZMA_MEMLIMIT_ERROR) { uint64_t needed = lzma_memusage(&strm); if (UINT64_MAX - needed < memused) needed = UINT64_MAX; else needed += memused; message_mem_needed(V_ERROR, needed); } goto error; } // Decode the Stream Header and check that its Stream Flags // match the Stream Footer. pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); goto error; } pos -= lzma_index_total_size(this_index); if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) goto error; ret = lzma_stream_header_decode(&header_flags, buf.u8); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } ret = lzma_stream_flags_compare(&header_flags, &footer_flags); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } // Store the decoded Stream Flags into this_index. This is // needed so that we can print which Check is used in each // Stream. ret = lzma_index_stream_flags(this_index, &footer_flags); if (ret != LZMA_OK) message_bug(); // Store also the size of the Stream Padding field. It is // needed to show the offsets of the Streams correctly. ret = lzma_index_stream_padding(this_index, stream_padding); if (ret != LZMA_OK) message_bug(); if (combined_index != NULL) { // Append the earlier decoded Indexes // after this_index. ret = lzma_index_cat( this_index, combined_index, NULL); if (ret != LZMA_OK) { message_error("%s: %s", pair->src_name, message_strm(ret)); goto error; } } combined_index = this_index; this_index = NULL; xfi->stream_padding += stream_padding; } while (pos > 0); lzma_end(&strm); // All OK. Make combined_index available to the caller. xfi->idx = combined_index; return false; error: // Something went wrong, free the allocated memory. lzma_end(&strm); lzma_index_end(combined_index, NULL); lzma_index_end(this_index, NULL); return true; } /// \brief Parse the Block Header /// /// The result is stored into *bhi. The caller takes care of initializing it. /// /// \return False on success, true on error. static bool parse_block_header(file_pair *pair, const lzma_index_iter *iter, block_header_info *bhi, xz_file_info *xfi) { #if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX # error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX #endif // Get the whole Block Header with one read, but don't read past // the end of the Block (or even its Check field). const uint32_t size = my_min(iter->block.total_size - lzma_check_size(iter->stream.flags->check), LZMA_BLOCK_HEADER_SIZE_MAX); io_buf buf; if (io_pread(pair, &buf, size, iter->block.compressed_file_offset)) return true; // Zero would mean Index Indicator and thus not a valid Block. if (buf.u8[0] == 0) goto data_error; // Initialize the block structure and decode Block Header Size. lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_block block; block.version = 0; block.check = iter->stream.flags->check; block.filters = filters; block.header_size = lzma_block_header_size_decode(buf.u8[0]); if (block.header_size > size) goto data_error; // Decode the Block Header. switch (lzma_block_header_decode(&block, NULL, buf.u8)) { case LZMA_OK: break; case LZMA_OPTIONS_ERROR: message_error("%s: %s", pair->src_name, message_strm(LZMA_OPTIONS_ERROR)); return true; case LZMA_DATA_ERROR: goto data_error; default: message_bug(); } // Check the Block Flags. These must be done before calling // lzma_block_compressed_size(), because it overwrites // block.compressed_size. bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN ? 'c' : '-'; bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN ? 'u' : '-'; bhi->flags[2] = '\0'; // Collect information if all Blocks have both Compressed Size // and Uncompressed Size fields. They can be useful e.g. for // multi-threaded decompression so it can be useful to know it. xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN && block.uncompressed_size != LZMA_VLI_UNKNOWN; // Validate or set block.compressed_size. switch (lzma_block_compressed_size(&block, iter->block.unpadded_size)) { case LZMA_OK: - break; + // Validate also block.uncompressed_size if it is present. + // If it isn't present, there's no need to set it since + // we aren't going to actually decompress the Block; if + // we were decompressing, then we should set it so that + // the Block decoder could validate the Uncompressed Size + // that was stored in the Index. + if (block.uncompressed_size == LZMA_VLI_UNKNOWN + || block.uncompressed_size + == iter->block.uncompressed_size) + break; + + // If the above fails, the file is corrupt so + // LZMA_DATA_ERROR is a good error code. case LZMA_DATA_ERROR: // Free the memory allocated by lzma_block_header_decode(). for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free(filters[i].options); goto data_error; default: message_bug(); } // Copy the known sizes. bhi->header_size = block.header_size; bhi->compressed_size = block.compressed_size; // Calculate the decoder memory usage and update the maximum // memory usage of this Block. bhi->memusage = lzma_raw_decoder_memusage(filters); if (xfi->memusage_max < bhi->memusage) xfi->memusage_max = bhi->memusage; // Convert the filter chain to human readable form. message_filters_to_str(bhi->filter_chain, filters, false); // Free the memory allocated by lzma_block_header_decode(). for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free(filters[i].options); return false; data_error: // Show the error message. message_error("%s: %s", pair->src_name, message_strm(LZMA_DATA_ERROR)); return true; } /// \brief Parse the Check field and put it into check_value[] /// /// \return False on success, true on error. static bool parse_check_value(file_pair *pair, const lzma_index_iter *iter) { // Don't read anything from the file if there is no integrity Check. if (iter->stream.flags->check == LZMA_CHECK_NONE) { snprintf(check_value, sizeof(check_value), "---"); return false; } // Locate and read the Check field. const uint32_t size = lzma_check_size(iter->stream.flags->check); const off_t offset = iter->block.compressed_file_offset + iter->block.total_size - size; io_buf buf; if (io_pread(pair, &buf, size, offset)) return true; // CRC32 and CRC64 are in little endian. Guess that all the future // 32-bit and 64-bit Check values are little endian too. It shouldn't // be a too big problem if this guess is wrong. if (size == 4) snprintf(check_value, sizeof(check_value), "%08" PRIx32, conv32le(buf.u32[0])); else if (size == 8) snprintf(check_value, sizeof(check_value), "%016" PRIx64, conv64le(buf.u64[0])); else for (size_t i = 0; i < size; ++i) snprintf(check_value + i * 2, 3, "%02x", buf.u8[i]); return false; } /// \brief Parse detailed information about a Block /// /// Since this requires seek(s), listing information about all Blocks can /// be slow. /// /// \param pair Input file /// \param iter Location of the Block whose Check value should /// be printed. /// \param bhi Pointer to structure where to store the information /// about the Block Header field. /// /// \return False on success, true on error. If an error occurs, /// the error message is printed too so the caller doesn't /// need to worry about that. static bool parse_details(file_pair *pair, const lzma_index_iter *iter, block_header_info *bhi, xz_file_info *xfi) { if (parse_block_header(pair, iter, bhi, xfi)) return true; if (parse_check_value(pair, iter)) return true; return false; } /// \brief Get the compression ratio /// /// This has slightly different format than that is used in message.c. static const char * get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) { if (uncompressed_size == 0) return "---"; const double ratio = (double)(compressed_size) / (double)(uncompressed_size); if (ratio > 9.999) return "---"; static char buf[16]; snprintf(buf, sizeof(buf), "%.3f", ratio); return buf; } /// \brief Get a comma-separated list of Check names /// /// The check names are translated with gettext except when in robot mode. /// /// \param buf Buffer to hold the resulting string /// \param checks Bit mask of Checks to print /// \param space_after_comma /// It's better to not use spaces in table-like listings, /// but in more verbose formats a space after a comma /// is good for readability. static void get_check_names(char buf[CHECKS_STR_SIZE], uint32_t checks, bool space_after_comma) { assert(checks != 0); char *pos = buf; size_t left = CHECKS_STR_SIZE; const char *sep = space_after_comma ? ", " : ","; bool comma = false; for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { if (checks & (UINT32_C(1) << i)) { my_snprintf(&pos, &left, "%s%s", comma ? sep : "", opt_robot ? check_names[i] : _(check_names[i])); comma = true; } } return; } static bool print_info_basic(const xz_file_info *xfi, file_pair *pair) { static bool headings_displayed = false; if (!headings_displayed) { headings_displayed = true; // TRANSLATORS: These are column headings. From Strms (Streams) // to Ratio, the columns are right aligned. Check and Filename // are left aligned. If you need longer words, it's OK to // use two lines here. Test with "xz -l foo.xz". puts(_("Strms Blocks Compressed Uncompressed Ratio " "Check Filename")); } char checks[CHECKS_STR_SIZE]; get_check_names(checks, lzma_index_checks(xfi->idx), false); const char *cols[7] = { uint64_to_str(lzma_index_stream_count(xfi->idx), 0), uint64_to_str(lzma_index_block_count(xfi->idx), 1), uint64_to_nicestr(lzma_index_file_size(xfi->idx), NICESTR_B, NICESTR_TIB, false, 2), uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx), NICESTR_B, NICESTR_TIB, false, 3), get_ratio(lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx)), checks, pair->src_name, }; printf("%*s %*s %*s %*s %*s %-*s %s\n", tuklib_mbstr_fw(cols[0], 5), cols[0], tuklib_mbstr_fw(cols[1], 7), cols[1], tuklib_mbstr_fw(cols[2], 11), cols[2], tuklib_mbstr_fw(cols[3], 11), cols[3], tuklib_mbstr_fw(cols[4], 5), cols[4], tuklib_mbstr_fw(cols[5], 7), cols[5], cols[6]); return false; } static void print_adv_helper(uint64_t stream_count, uint64_t block_count, uint64_t compressed_size, uint64_t uncompressed_size, uint32_t checks, uint64_t stream_padding) { char checks_str[CHECKS_STR_SIZE]; get_check_names(checks_str, checks, true); printf(_(" Streams: %s\n"), uint64_to_str(stream_count, 0)); printf(_(" Blocks: %s\n"), uint64_to_str(block_count, 0)); printf(_(" Compressed size: %s\n"), uint64_to_nicestr(compressed_size, NICESTR_B, NICESTR_TIB, true, 0)); printf(_(" Uncompressed size: %s\n"), uint64_to_nicestr(uncompressed_size, NICESTR_B, NICESTR_TIB, true, 0)); printf(_(" Ratio: %s\n"), get_ratio(compressed_size, uncompressed_size)); printf(_(" Check: %s\n"), checks_str); printf(_(" Stream padding: %s\n"), uint64_to_nicestr(stream_padding, NICESTR_B, NICESTR_TIB, true, 0)); return; } static bool print_info_adv(xz_file_info *xfi, file_pair *pair) { // Print the overall information. print_adv_helper(lzma_index_stream_count(xfi->idx), lzma_index_block_count(xfi->idx), lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx), lzma_index_checks(xfi->idx), xfi->stream_padding); // Size of the biggest Check. This is used to calculate the width // of the CheckVal field. The table would get insanely wide if // we always reserved space for 64-byte Check (128 chars as hex). uint32_t check_max = 0; // Print information about the Streams. // // TRANSLATORS: The second line is column headings. All except // Check are right aligned; Check is left aligned. Test with // "xz -lv foo.xz". puts(_(" Streams:\n Stream Blocks" " CompOffset UncompOffset" " CompSize UncompSize Ratio" " Check Padding")); lzma_index_iter iter; lzma_index_iter_init(&iter, xfi->idx); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { const char *cols1[4] = { uint64_to_str(iter.stream.number, 0), uint64_to_str(iter.stream.block_count, 1), uint64_to_str(iter.stream.compressed_offset, 2), uint64_to_str(iter.stream.uncompressed_offset, 3), }; printf(" %*s %*s %*s %*s ", tuklib_mbstr_fw(cols1[0], 6), cols1[0], tuklib_mbstr_fw(cols1[1], 9), cols1[1], tuklib_mbstr_fw(cols1[2], 15), cols1[2], tuklib_mbstr_fw(cols1[3], 15), cols1[3]); const char *cols2[5] = { uint64_to_str(iter.stream.compressed_size, 0), uint64_to_str(iter.stream.uncompressed_size, 1), get_ratio(iter.stream.compressed_size, iter.stream.uncompressed_size), _(check_names[iter.stream.flags->check]), uint64_to_str(iter.stream.padding, 2), }; printf("%*s %*s %*s %-*s %*s\n", tuklib_mbstr_fw(cols2[0], 15), cols2[0], tuklib_mbstr_fw(cols2[1], 15), cols2[1], tuklib_mbstr_fw(cols2[2], 5), cols2[2], tuklib_mbstr_fw(cols2[3], 10), cols2[3], tuklib_mbstr_fw(cols2[4], 7), cols2[4]); // Update the maximum Check size. if (lzma_check_size(iter.stream.flags->check) > check_max) check_max = lzma_check_size(iter.stream.flags->check); } // Cache the verbosity level to a local variable. const bool detailed = message_verbosity_get() >= V_DEBUG; // Information collected from Block Headers block_header_info bhi; // Print information about the Blocks but only if there is // at least one Block. if (lzma_index_block_count(xfi->idx) > 0) { // Calculate the width of the CheckVal field. const int checkval_width = my_max(8, 2 * check_max); // TRANSLATORS: The second line is column headings. All // except Check are right aligned; Check is left aligned. printf(_(" Blocks:\n Stream Block" " CompOffset UncompOffset" " TotalSize UncompSize Ratio Check")); if (detailed) { // TRANSLATORS: These are additional column headings // for the most verbose listing mode. CheckVal // (Check value), Flags, and Filters are left aligned. // Header (Block Header Size), CompSize, and MemUsage // are right aligned. %*s is replaced with 0-120 // spaces to make the CheckVal column wide enough. // Test with "xz -lvv foo.xz". printf(_(" CheckVal %*s Header Flags " "CompSize MemUsage Filters"), checkval_width - 8, ""); } putchar('\n'); lzma_index_iter_init(&iter, xfi->idx); // Iterate over the Blocks. while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { if (detailed && parse_details(pair, &iter, &bhi, xfi)) return true; const char *cols1[4] = { uint64_to_str(iter.stream.number, 0), uint64_to_str( iter.block.number_in_stream, 1), uint64_to_str( iter.block.compressed_file_offset, 2), uint64_to_str( iter.block.uncompressed_file_offset, 3) }; printf(" %*s %*s %*s %*s ", tuklib_mbstr_fw(cols1[0], 6), cols1[0], tuklib_mbstr_fw(cols1[1], 9), cols1[1], tuklib_mbstr_fw(cols1[2], 15), cols1[2], tuklib_mbstr_fw(cols1[3], 15), cols1[3]); const char *cols2[4] = { uint64_to_str(iter.block.total_size, 0), uint64_to_str(iter.block.uncompressed_size, 1), get_ratio(iter.block.total_size, iter.block.uncompressed_size), _(check_names[iter.stream.flags->check]) }; printf("%*s %*s %*s %-*s", tuklib_mbstr_fw(cols2[0], 15), cols2[0], tuklib_mbstr_fw(cols2[1], 15), cols2[1], tuklib_mbstr_fw(cols2[2], 5), cols2[2], tuklib_mbstr_fw(cols2[3], detailed ? 11 : 1), cols2[3]); if (detailed) { const lzma_vli compressed_size = iter.block.unpadded_size - bhi.header_size - lzma_check_size( iter.stream.flags->check); const char *cols3[6] = { check_value, uint64_to_str(bhi.header_size, 0), bhi.flags, uint64_to_str(compressed_size, 1), uint64_to_str( round_up_to_mib(bhi.memusage), 2), bhi.filter_chain }; // Show MiB for memory usage, because it // is the only size which is not in bytes. printf("%-*s %*s %-5s %*s %*s MiB %s", checkval_width, cols3[0], tuklib_mbstr_fw(cols3[1], 6), cols3[1], cols3[2], tuklib_mbstr_fw(cols3[3], 15), cols3[3], tuklib_mbstr_fw(cols3[4], 7), cols3[4], cols3[5]); } putchar('\n'); } } if (detailed) { printf(_(" Memory needed: %s MiB\n"), uint64_to_str( round_up_to_mib(xfi->memusage_max), 0)); printf(_(" Sizes in headers: %s\n"), xfi->all_have_sizes ? _("Yes") : _("No")); } return false; } static bool print_info_robot(xz_file_info *xfi, file_pair *pair) { char checks[CHECKS_STR_SIZE]; get_check_names(checks, lzma_index_checks(xfi->idx), false); printf("name\t%s\n", pair->src_name); printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\n", lzma_index_stream_count(xfi->idx), lzma_index_block_count(xfi->idx), lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx), get_ratio(lzma_index_file_size(xfi->idx), lzma_index_uncompressed_size(xfi->idx)), checks, xfi->stream_padding); if (message_verbosity_get() >= V_VERBOSE) { lzma_index_iter iter; lzma_index_iter_init(&iter, xfi->idx); while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\n", iter.stream.number, iter.stream.block_count, iter.stream.compressed_offset, iter.stream.uncompressed_offset, iter.stream.compressed_size, iter.stream.uncompressed_size, get_ratio(iter.stream.compressed_size, iter.stream.uncompressed_size), check_names[iter.stream.flags->check], iter.stream.padding); lzma_index_iter_rewind(&iter); block_header_info bhi; while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { if (message_verbosity_get() >= V_DEBUG && parse_details( pair, &iter, &bhi, xfi)) return true; printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", iter.stream.number, iter.block.number_in_stream, iter.block.number_in_file, iter.block.compressed_file_offset, iter.block.uncompressed_file_offset, iter.block.total_size, iter.block.uncompressed_size, get_ratio(iter.block.total_size, iter.block.uncompressed_size), check_names[iter.stream.flags->check]); if (message_verbosity_get() >= V_DEBUG) printf("\t%s\t%" PRIu32 "\t%s\t%" PRIu64 "\t%" PRIu64 "\t%s", check_value, bhi.header_size, bhi.flags, bhi.compressed_size, bhi.memusage, bhi.filter_chain); putchar('\n'); } } if (message_verbosity_get() >= V_DEBUG) printf("summary\t%" PRIu64 "\t%s\n", xfi->memusage_max, xfi->all_have_sizes ? "yes" : "no"); return false; } static void update_totals(const xz_file_info *xfi) { // TODO: Integer overflow checks ++totals.files; totals.streams += lzma_index_stream_count(xfi->idx); totals.blocks += lzma_index_block_count(xfi->idx); totals.compressed_size += lzma_index_file_size(xfi->idx); totals.uncompressed_size += lzma_index_uncompressed_size(xfi->idx); totals.stream_padding += xfi->stream_padding; totals.checks |= lzma_index_checks(xfi->idx); if (totals.memusage_max < xfi->memusage_max) totals.memusage_max = xfi->memusage_max; totals.all_have_sizes &= xfi->all_have_sizes; return; } static void print_totals_basic(void) { // Print a separator line. char line[80]; memset(line, '-', sizeof(line)); line[sizeof(line) - 1] = '\0'; puts(line); // Get the check names. char checks[CHECKS_STR_SIZE]; get_check_names(checks, totals.checks, false); // Print the totals except the file count, which needs // special handling. printf("%5s %7s %11s %11s %5s %-7s ", uint64_to_str(totals.streams, 0), uint64_to_str(totals.blocks, 1), uint64_to_nicestr(totals.compressed_size, NICESTR_B, NICESTR_TIB, false, 2), uint64_to_nicestr(totals.uncompressed_size, NICESTR_B, NICESTR_TIB, false, 3), get_ratio(totals.compressed_size, totals.uncompressed_size), checks); // Since we print totals only when there are at least two files, // the English message will always use "%s files". But some other // languages need different forms for different plurals so we // have to translate this with ngettext(). // // TRANSLATORS: %s is an integer. Only the plural form of this // message is used (e.g. "2 files"). Test with "xz -l foo.xz bar.xz". printf(ngettext("%s file\n", "%s files\n", totals.files <= ULONG_MAX ? totals.files : (totals.files % 1000000) + 1000000), uint64_to_str(totals.files, 0)); return; } static void print_totals_adv(void) { putchar('\n'); puts(_("Totals:")); printf(_(" Number of files: %s\n"), uint64_to_str(totals.files, 0)); print_adv_helper(totals.streams, totals.blocks, totals.compressed_size, totals.uncompressed_size, totals.checks, totals.stream_padding); if (message_verbosity_get() >= V_DEBUG) { printf(_(" Memory needed: %s MiB\n"), uint64_to_str( round_up_to_mib(totals.memusage_max), 0)); printf(_(" Sizes in headers: %s\n"), totals.all_have_sizes ? _("Yes") : _("No")); } return; } static void print_totals_robot(void) { char checks[CHECKS_STR_SIZE]; get_check_names(checks, totals.checks, false); printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s\t%" PRIu64 "\t%" PRIu64, totals.streams, totals.blocks, totals.compressed_size, totals.uncompressed_size, get_ratio(totals.compressed_size, totals.uncompressed_size), checks, totals.stream_padding, totals.files); if (message_verbosity_get() >= V_DEBUG) printf("\t%" PRIu64 "\t%s", totals.memusage_max, totals.all_have_sizes ? "yes" : "no"); putchar('\n'); return; } extern void list_totals(void) { if (opt_robot) { // Always print totals in --robot mode. It can be convenient // in some cases and doesn't complicate usage of the // single-file case much. print_totals_robot(); } else if (totals.files > 1) { // For non-robot mode, totals are printed only if there // is more than one file. if (message_verbosity_get() <= V_WARNING) print_totals_basic(); else print_totals_adv(); } return; } extern void list_file(const char *filename) { if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) message_fatal(_("--list works only on .xz files " "(--format=xz or --format=auto)")); message_filename(filename); if (filename == stdin_filename) { message_error(_("--list does not support reading from " "standard input")); return; } // Unset opt_stdout so that io_open_src() won't accept special files. // Set opt_force so that io_open_src() will follow symlinks. opt_stdout = false; opt_force = true; file_pair *pair = io_open_src(filename); if (pair == NULL) return; xz_file_info xfi = XZ_FILE_INFO_INIT; if (!parse_indexes(&xfi, pair)) { bool fail; // We have three main modes: // - --robot, which has submodes if --verbose is specified // once or twice // - Normal --list without --verbose // - --list with one or two --verbose if (opt_robot) fail = print_info_robot(&xfi, pair); else if (message_verbosity_get() <= V_WARNING) fail = print_info_basic(&xfi, pair); else fail = print_info_adv(&xfi, pair); // Update the totals that are displayed after all // the individual files have been listed. Don't count // broken files. if (!fail) update_totals(&xfi); lzma_index_end(xfi.idx, NULL); } io_close(pair, false); return; } diff --git a/src/xz/util.c b/src/xz/util.c index 987b44302539..35850f4c9046 100644 --- a/src/xz/util.c +++ b/src/xz/util.c @@ -1,278 +1,288 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file util.c /// \brief Miscellaneous utility functions // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include /// Buffers for uint64_to_str() and uint64_to_nicestr() static char bufs[4][128]; /// Thousand separator support in uint64_to_str() and uint64_to_nicestr() static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; extern void * xrealloc(void *ptr, size_t size) { assert(size > 0); + // Save ptr so that we can free it if realloc fails. + // The point is that message_fatal ends up calling stdio functions + // which in some libc implementations might allocate memory from + // the heap. Freeing ptr improves the chances that there's free + // memory for stdio functions if they need it. + void *p = ptr; ptr = realloc(ptr, size); - if (ptr == NULL) - message_fatal("%s", strerror(errno)); + + if (ptr == NULL) { + const int saved_errno = errno; + free(p); + message_fatal("%s", strerror(saved_errno)); + } return ptr; } extern char * xstrdup(const char *src) { assert(src != NULL); const size_t size = strlen(src) + 1; char *dest = xmalloc(size); return memcpy(dest, src, size); } extern uint64_t str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) { uint64_t result = 0; // Skip blanks. while (*value == ' ' || *value == '\t') ++value; // Accept special value "max". Supporting "min" doesn't seem useful. if (strcmp(value, "max") == 0) return max; if (*value < '0' || *value > '9') message_fatal(_("%s: Value is not a non-negative " "decimal integer"), value); do { // Don't overflow. if (result > UINT64_MAX / 10) goto error; result *= 10; // Another overflow check const uint32_t add = *value - '0'; if (UINT64_MAX - add < result) goto error; result += add; ++value; } while (*value >= '0' && *value <= '9'); if (*value != '\0') { // Look for suffix. Originally this supported both base-2 // and base-10, but since there seems to be little need // for base-10 in this program, treat everything as base-2 // and also be more relaxed about the case of the first // letter of the suffix. uint64_t multiplier = 0; if (*value == 'k' || *value == 'K') multiplier = UINT64_C(1) << 10; else if (*value == 'm' || *value == 'M') multiplier = UINT64_C(1) << 20; else if (*value == 'g' || *value == 'G') multiplier = UINT64_C(1) << 30; ++value; // Allow also e.g. Ki, KiB, and KB. if (*value != '\0' && strcmp(value, "i") != 0 && strcmp(value, "iB") != 0 && strcmp(value, "B") != 0) multiplier = 0; if (multiplier == 0) { message(V_ERROR, _("%s: Invalid multiplier suffix"), value - 1); message_fatal(_("Valid suffixes are `KiB' (2^10), " "`MiB' (2^20), and `GiB' (2^30).")); } // Don't overflow here either. if (result > UINT64_MAX / multiplier) goto error; result *= multiplier; } if (result < min || result > max) goto error; return result; error: message_fatal(_("Value of the option `%s' must be in the range " "[%" PRIu64 ", %" PRIu64 "]"), name, min, max); } extern uint64_t round_up_to_mib(uint64_t n) { return (n >> 20) + ((n & ((UINT32_C(1) << 20) - 1)) != 0); } /// Check if thousand separator is supported. Run-time checking is easiest, /// because it seems to be sometimes lacking even on POSIXish system. static void check_thousand_sep(uint32_t slot) { if (thousand == UNKNOWN) { bufs[slot][0] = '\0'; snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U); thousand = bufs[slot][0] == '1' ? WORKS : BROKEN; } return; } extern const char * uint64_to_str(uint64_t value, uint32_t slot) { assert(slot < ARRAY_SIZE(bufs)); check_thousand_sep(slot); if (thousand == WORKS) snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value); else snprintf(bufs[slot], sizeof(bufs[slot]), "%" PRIu64, value); return bufs[slot]; } extern const char * uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, enum nicestr_unit unit_max, bool always_also_bytes, uint32_t slot) { assert(unit_min <= unit_max); assert(unit_max <= NICESTR_TIB); assert(slot < ARRAY_SIZE(bufs)); check_thousand_sep(slot); enum nicestr_unit unit = NICESTR_B; char *pos = bufs[slot]; size_t left = sizeof(bufs[slot]); if ((unit_min == NICESTR_B && value < 10000) || unit_max == NICESTR_B) { // The value is shown as bytes. if (thousand == WORKS) my_snprintf(&pos, &left, "%'u", (unsigned int)value); else my_snprintf(&pos, &left, "%u", (unsigned int)value); } else { // Scale the value to a nicer unit. Unless unit_min and // unit_max limit us, we will show at most five significant // digits with one decimal place. double d = (double)(value); do { d /= 1024.0; ++unit; } while (unit < unit_min || (d > 9999.9 && unit < unit_max)); if (thousand == WORKS) my_snprintf(&pos, &left, "%'.1f", d); else my_snprintf(&pos, &left, "%.1f", d); } static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" }; my_snprintf(&pos, &left, " %s", suffix[unit]); if (always_also_bytes && value >= 10000) { if (thousand == WORKS) snprintf(pos, left, " (%'" PRIu64 " B)", value); else snprintf(pos, left, " (%" PRIu64 " B)", value); } return bufs[slot]; } extern void my_snprintf(char **pos, size_t *left, const char *fmt, ...) { va_list ap; va_start(ap, fmt); const int len = vsnprintf(*pos, *left, fmt, ap); va_end(ap); // If an error occurred, we want the caller to think that the whole // buffer was used. This way no more data will be written to the // buffer. We don't need better error handling here, although it // is possible that the result looks garbage on the terminal if // e.g. an UTF-8 character gets split. That shouldn't (easily) // happen though, because the buffers used have some extra room. if (len < 0 || (size_t)(len) >= *left) { *left = 0; } else { *pos += len; *left -= len; } return; } extern bool is_empty_filename(const char *filename) { if (filename[0] == '\0') { message_error(_("Empty filename, skipping")); return true; } return false; } extern bool is_tty_stdin(void) { const bool ret = isatty(STDIN_FILENO); if (ret) message_error(_("Compressed data cannot be read from " "a terminal")); return ret; } extern bool is_tty_stdout(void) { const bool ret = isatty(STDOUT_FILENO); if (ret) message_error(_("Compressed data cannot be written to " "a terminal")); return ret; } diff --git a/src/xz/xz.1 b/src/xz/xz.1 index 8edc456c241f..363b90cb4e55 100644 --- a/src/xz/xz.1 +++ b/src/xz/xz.1 @@ -1,2586 +1,2599 @@ '\" t .\" .\" Author: Lasse Collin .\" .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH XZ 1 "2012-05-27" "Tukaani" "XZ Utils" +.TH XZ 1 "2013-06-21" "Tukaani" "XZ Utils" . .SH NAME xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files . .SH SYNOPSIS .B xz -.RI [ option ]... -.RI [ file ]... -.PP +.RI [ option... ] +.RI [ file... ] +. +.SH COMMAND ALIASES .B unxz is equivalent to .BR "xz \-\-decompress" . .br .B xzcat is equivalent to .BR "xz \-\-decompress \-\-stdout" . .br .B lzma is equivalent to .BR "xz \-\-format=lzma" . .br .B unlzma is equivalent to .BR "xz \-\-format=lzma \-\-decompress" . .br .B lzcat is equivalent to .BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . .PP When writing scripts that need to decompress files, it is recommended to always use the name .B xz with appropriate arguments .RB ( "xz \-d" or .BR "xz \-dc" ) instead of the names .B unxz and .BR xzcat . . .SH DESCRIPTION .B xz is a general-purpose data compression tool with command line syntax similar to .BR gzip (1) and .BR bzip2 (1). The native file format is the .B .xz format, but the legacy .B .lzma format used by LZMA Utils and raw compressed streams with no container format headers are also supported. .PP .B xz compresses or decompresses each .I file according to the selected operation mode. If no .I files are given or .I file is .BR \- , .B xz reads from standard input and writes the processed data to standard output. .B xz will refuse (display an error and skip the .IR file ) to write compressed data to standard output if it is a terminal. Similarly, .B xz will refuse to read compressed data from standard input if it is a terminal. .PP Unless .B \-\-stdout is specified, .I files other than .B \- are written to a new file whose name is derived from the source .I file name: .IP \(bu 3 When compressing, the suffix of the target file format .RB ( .xz or .BR .lzma ) is appended to the source filename to get the target filename. .IP \(bu 3 When decompressing, the .B .xz or .B .lzma suffix is removed from the filename to get the target filename. .B xz also recognizes the suffixes .B .txz and .BR .tlz , and replaces them with the .B .tar suffix. .PP If the target file already exists, an error is displayed and the .I file is skipped. .PP Unless writing to standard output, .B xz will display a warning and skip the .I file if any of the following applies: .IP \(bu 3 .I File is not a regular file. Symbolic links are not followed, and thus they are not considered to be regular files. .IP \(bu 3 .I File has more than one hard link. .IP \(bu 3 .I File has setuid, setgid, or sticky bit set. .IP \(bu 3 The operation mode is set to compress and the .I file already has a suffix of the target file format .RB ( .xz or .B .txz when compressing to the .B .xz format, and .B .lzma or .B .tlz when compressing to the .B .lzma format). .IP \(bu 3 The operation mode is set to decompress and the .I file doesn't have a suffix of any of the supported file formats .RB ( .xz , .BR .txz , .BR .lzma , or .BR .tlz ). .PP After successfully compressing or decompressing the .IR file , .B xz copies the owner, group, permissions, access time, and modification time from the source .I file to the target file. If copying the group fails, the permissions are modified so that the target file doesn't become accessible to users who didn't have permission to access the source .IR file . .B xz doesn't support copying other metadata like access control lists or extended attributes yet. .PP Once the target file has been successfully closed, the source .I file is removed unless .B \-\-keep was specified. The source .I file is never removed if the output is written to standard output. .PP Sending .B SIGINFO or .B SIGUSR1 to the .B xz process makes it print progress information to standard error. This has only limited use since when standard error is a terminal, using .B \-\-verbose will display an automatically updating progress indicator. . .SS "Memory usage" The memory usage of .B xz varies from a few hundred kilobytes to several gigabytes depending on the compression settings. The settings used when compressing a file determine the memory requirements of the decompressor. Typically the decompressor needs 5\ % to 20\ % of the amount of memory that the compressor needed when creating the file. For example, decompressing a file created with .B xz \-9 currently requires 65\ MiB of memory. Still, it is possible to have .B .xz files that require several gigabytes of memory to decompress. .PP Especially users of older systems may find the possibility of very large memory usage annoying. To prevent uncomfortable surprises, .B xz has a built-in memory usage limiter, which is disabled by default. While some operating systems provide ways to limit the memory usage of processes, relying on it wasn't deemed to be flexible enough (e.g. using .BR ulimit (1) to limit virtual memory tends to cripple .BR mmap (2)). .PP The memory usage limiter can be enabled with the command line option \fB\-\-memlimit=\fIlimit\fR. Often it is more convenient to enable the limiter by default by setting the environment variable .BR XZ_DEFAULTS , e.g.\& .BR XZ_DEFAULTS=\-\-memlimit=150MiB . It is possible to set the limits separately for compression and decompression by using \fB\-\-memlimit\-compress=\fIlimit\fR and \fB\-\-memlimit\-decompress=\fIlimit\fR. Using these two options outside .B XZ_DEFAULTS is rarely useful because a single run of .B xz cannot do both compression and decompression and .BI \-\-memlimit= limit (or \fB\-M\fR \fIlimit\fR) is shorter to type on the command line. .PP If the specified memory usage limit is exceeded when decompressing, .B xz will display an error and decompressing the file will fail. If the limit is exceeded when compressing, .B xz will try to scale the settings down so that the limit is no longer exceeded (except when using \fB\-\-format=raw\fR or \fB\-\-no\-adjust\fR). This way the operation won't fail unless the limit is very small. The scaling of the settings is done in steps that don't match the compression level presets, e.g. if the limit is only slightly less than the amount required for .BR "xz \-9" , the settings will be scaled down only a little, not all the way down to .BR "xz \-8" . . .SS "Concatenation and padding with .xz files" It is possible to concatenate .B .xz files as is. .B xz will decompress such files as if they were a single .B .xz file. .PP It is possible to insert padding between the concatenated parts or after the last part. The padding must consist of null bytes and the size of the padding must be a multiple of four bytes. This can be useful e.g. if the .B .xz file is stored on a medium that measures file sizes in 512-byte blocks. .PP Concatenation and padding are not allowed with .B .lzma files or raw streams. . .SH OPTIONS . .SS "Integer suffixes and special values" In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers. There must be no space between the integer and the suffix. .TP .B KiB Multiply the integer by 1,024 (2^10). .BR Ki , .BR k , .BR kB , .BR K , and .B KB are accepted as synonyms for .BR KiB . .TP .B MiB Multiply the integer by 1,048,576 (2^20). .BR Mi , .BR m , .BR M , and .B MB are accepted as synonyms for .BR MiB . .TP .B GiB Multiply the integer by 1,073,741,824 (2^30). .BR Gi , .BR g , .BR G , and .B GB are accepted as synonyms for .BR GiB . .PP The special value .B max can be used to indicate the maximum integer value supported by the option. . .SS "Operation mode" If multiple operation mode options are given, the last one takes effect. .TP .BR \-z ", " \-\-compress Compress. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, .B unxz implies .BR \-\-decompress ). .TP .BR \-d ", " \-\-decompress ", " \-\-uncompress Decompress. .TP .BR \-t ", " \-\-test Test the integrity of compressed .IR files . This option is equivalent to .B "\-\-decompress \-\-stdout" except that the decompressed data is discarded instead of being written to standard output. No files are created or removed. .TP .BR \-l ", " \-\-list Print information about compressed .IR files . No uncompressed output is produced, and no files are created or removed. In list mode, the program cannot read the compressed data from standard input or from other unseekable sources. .IP "" The default listing shows basic information about .IR files , one file per line. To get more detailed information, use also the .B \-\-verbose option. For even more information, use .B \-\-verbose twice, but note that this may be slow, because getting all the extra information requires many seeks. The width of verbose output exceeds 80 characters, so piping the output to e.g.\& .B "less\ \-S" may be convenient if the terminal isn't wide enough. .IP "" The exact output may vary between .B xz versions and different locales. For machine-readable output, .B \-\-robot \-\-list should be used. . .SS "Operation modifiers" .TP .BR \-k ", " \-\-keep Don't delete the input files. .TP .BR \-f ", " \-\-force This option has several effects: .RS .IP \(bu 3 If the target file already exists, delete it before compressing or decompressing. .IP \(bu 3 Compress or decompress even if the input is a symbolic link to a regular file, has more than one hard link, or has the setuid, setgid, or sticky bit set. The setuid, setgid, and sticky bits are not copied to the target file. .IP \(bu 3 When used with .B \-\-decompress .BR \-\-stdout and .B xz cannot recognize the type of the source file, copy the source file as is to standard output. This allows .B xzcat .B \-\-force to be used like .BR cat (1) for files that have not been compressed with .BR xz . Note that in future, .B xz might support new compressed file formats, which may make .B xz decompress more types of files instead of copying them as is to standard output. .BI \-\-format= format can be used to restrict .B xz to decompress only a single file format. .RE .TP .BR \-c ", " \-\-stdout ", " \-\-to\-stdout Write the compressed or decompressed data to standard output instead of a file. This implies .BR \-\-keep . .TP .B \-\-no\-sparse Disable creation of sparse files. By default, if decompressing into a regular file, .B xz tries to make the file sparse if the decompressed data contains long sequences of binary zeros. It also works when writing to standard output as long as standard output is connected to a regular file and certain additional conditions are met to make it safe. Creating sparse files may save disk space and speed up the decompression by reducing the amount of disk I/O. .TP \fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf When compressing, use .I .suf as the suffix for the target file instead of .B .xz or .BR .lzma . If not writing to standard output and the source file already has the suffix .IR .suf , a warning is displayed and the file is skipped. .IP "" When decompressing, recognize files with the suffix .I .suf in addition to files with the .BR .xz , .BR .txz , .BR .lzma , or .B .tlz suffix. If the source file has the suffix .IR .suf , the suffix is removed to get the target filename. .IP "" When compressing or decompressing raw streams .RB ( \-\-format=raw ), the suffix must always be specified unless writing to standard output, because there is no default suffix for raw streams. .TP \fB\-\-files\fR[\fB=\fIfile\fR] Read the filenames to process from .IR file ; if .I file is omitted, filenames are read from standard input. Filenames must be terminated with the newline character. A dash .RB ( \- ) is taken as a regular filename; it doesn't mean standard input. If filenames are given also as command line arguments, they are processed before the filenames read from .IR file . .TP \fB\-\-files0\fR[\fB=\fIfile\fR] This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except that each filename must be terminated with the null character. . .SS "Basic file format and compression options" .TP \fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat Specify the file .I format to compress or decompress: .RS .TP .B auto This is the default. When compressing, .B auto is equivalent to .BR xz . When decompressing, the format of the input file is automatically detected. Note that raw streams (created with .BR \-\-format=raw ) cannot be auto-detected. .TP .B xz Compress to the .B .xz file format, or accept only .B .xz files when decompressing. .TP .BR lzma ", " alone Compress to the legacy .B .lzma file format, or accept only .B .lzma files when decompressing. The alternative name .B alone is provided for backwards compatibility with LZMA Utils. .TP .B raw Compress or uncompress a raw stream (no headers). This is meant for advanced users only. To decode raw streams, you need use .B \-\-format=raw and explicitly specify the filter chain, which normally would have been stored in the container headers. .RE .TP \fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck Specify the type of the integrity check. The check is calculated from the uncompressed data and stored in the .B .xz file. This option has an effect only when compressing into the .B .xz format; the .B .lzma format doesn't support integrity checks. The integrity check (if any) is verified when the .B .xz file is decompressed. .IP "" Supported .I check types: .RS .TP .B none Don't calculate an integrity check at all. This is usually a bad idea. This can be useful when integrity of the data is verified by other means anyway. .TP .B crc32 Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). .TP .B crc64 Calculate CRC64 using the polynomial from ECMA-182. This is the default, since it is slightly better than CRC32 at detecting damaged files and the speed difference is negligible. .TP .B sha256 Calculate SHA-256. This is somewhat slower than CRC32 and CRC64. .RE .IP "" Integrity of the .B .xz headers is always verified with CRC32. It is not possible to change or disable it. .TP .BR \-0 " ... " \-9 Select a compression preset level. The default is .BR \-6 . If multiple preset levels are specified, the last one takes effect. If a custom filter chain was already specified, setting a compression preset level clears the custom filter chain. .IP "" The differences between the presets are more significant than with .BR gzip (1) and .BR bzip2 (1). The selected compression settings determine the memory requirements of the decompressor, thus using a too high preset level might make it painful to decompress the file on an old system with little RAM. Specifically, .B "it's not a good idea to blindly use \-9 for everything" like it often is with .BR gzip (1) and .BR bzip2 (1). .RS .TP .BR "\-0" " ... " "\-3" These are somewhat fast presets. .B \-0 is sometimes faster than .B "gzip \-9" while compressing much better. The higher ones often have speed comparable to .BR bzip2 (1) with comparable or better compression ratio, although the results depend a lot on the type of data being compressed. .TP .BR "\-4" " ... " "\-6" Good to very good compression while keeping decompressor memory usage reasonable even for old systems. .B \-6 is the default, which is usually a good choice e.g. for distributing files that need to be decompressible even on systems with only 16\ MiB RAM. .RB ( \-5e or .B \-6e may be worth considering too. See .BR \-\-extreme .) .TP .B "\-7 ... \-9" These are like .B \-6 but with higher compressor and decompressor memory requirements. These are useful only when compressing files bigger than 8\ MiB, 16\ MiB, and 32\ MiB, respectively. .RE .IP "" On the same hardware, the decompression speed is approximately a constant number of bytes of compressed data per second. In other words, the better the compression, the faster the decompression will usually be. This also means that the amount of uncompressed output produced per second can vary a lot. .IP "" The following table summarises the features of the presets: .RS .RS .PP .TS tab(;); c c c c c n n n n n. Preset;DictSize;CompCPU;CompMem;DecMem \-0;256 KiB;0;3 MiB;1 MiB \-1;1 MiB;1;9 MiB;2 MiB \-2;2 MiB;2;17 MiB;3 MiB \-3;4 MiB;3;32 MiB;5 MiB \-4;4 MiB;4;48 MiB;5 MiB \-5;8 MiB;5;94 MiB;9 MiB \-6;8 MiB;6;94 MiB;9 MiB \-7;16 MiB;6;186 MiB;17 MiB \-8;32 MiB;6;370 MiB;33 MiB \-9;64 MiB;6;674 MiB;65 MiB .TE .RE .RE .IP "" Column descriptions: .RS .IP \(bu 3 DictSize is the LZMA2 dictionary size. It is waste of memory to use a dictionary bigger than the size of the uncompressed file. This is why it is good to avoid using the presets .BR \-7 " ... " \-9 when there's no real need for them. At .B \-6 and lower, the amount of memory wasted is usually low enough to not matter. .IP \(bu 3 CompCPU is a simplified representation of the LZMA2 settings that affect compression speed. The dictionary size affects speed too, so while CompCPU is the same for levels .BR \-6 " ... " \-9 , higher levels still tend to be a little slower. To get even slower and thus possibly better compression, see .BR \-\-extreme . .IP \(bu 3 CompMem contains the compressor memory requirements in the single-threaded mode. It may vary slightly between .B xz versions. Memory requirements of some of the future multithreaded modes may be dramatically higher than that of the single-threaded mode. .IP \(bu 3 DecMem contains the decompressor memory requirements. That is, the compression settings determine the memory requirements of the decompressor. -The exact decompressor memory usage is slighly more than +The exact decompressor memory usage is slightly more than the LZMA2 dictionary size, but the values in the table have been rounded up to the next full MiB. .RE .TP .BR \-e ", " \-\-extreme Use a slower variant of the selected compression preset level .RB ( \-0 " ... " \-9 ) to hopefully get a little bit better compression ratio, but with bad luck this can also make it worse. Decompressor memory usage is not affected, but compressor memory usage increases a little at preset levels .BR \-0 " ... " \-3 . .IP "" Since there are two presets with dictionary sizes 4\ MiB and 8\ MiB, the presets .B \-3e and .B \-5e use slightly faster settings (lower CompCPU) than .B \-4e and .BR \-6e , respectively. That way no two presets are identical. .RS .RS .PP .TS tab(;); c c c c c n n n n n. Preset;DictSize;CompCPU;CompMem;DecMem \-0e;256 KiB;8;4 MiB;1 MiB \-1e;1 MiB;8;13 MiB;2 MiB \-2e;2 MiB;8;25 MiB;3 MiB \-3e;4 MiB;7;48 MiB;5 MiB \-4e;4 MiB;8;48 MiB;5 MiB \-5e;8 MiB;7;94 MiB;9 MiB \-6e;8 MiB;8;94 MiB;9 MiB \-7e;16 MiB;8;186 MiB;17 MiB \-8e;32 MiB;8;370 MiB;33 MiB \-9e;64 MiB;8;674 MiB;65 MiB .TE .RE .RE .IP "" For example, there are a total of four presets that use 8\ MiB dictionary, whose order from the fastest to the slowest is .BR \-5 , .BR \-6 , .BR \-5e , and .BR \-6e . .TP .B \-\-fast .PD 0 .TP .B \-\-best .PD These are somewhat misleading aliases for .B \-0 and .BR \-9 , respectively. These are provided only for backwards compatibility with LZMA Utils. Avoid using these options. .TP .BI \-\-memlimit\-compress= limit Set a memory usage limit for compression. If this option is specified multiple times, the last one takes effect. .IP "" If the compression settings exceed the .IR limit , .B xz will adjust the settings downwards so that the limit is no longer exceeded and display a notice that automatic adjustment was done. Such adjustments are not made when compressing with .B \-\-format=raw or if .B \-\-no\-adjust has been specified. In those cases, an error is displayed and .B xz will exit with exit status 1. .IP "" The .I limit can be specified in multiple ways: .RS .IP \(bu 3 The .I limit can be an absolute value in bytes. Using an integer suffix like .B MiB can be useful. Example: .B "\-\-memlimit\-compress=80MiB" .IP \(bu 3 The .I limit can be specified as a percentage of total physical memory (RAM). This can be useful especially when setting the .B XZ_DEFAULTS environment variable in a shell initialization script that is shared between different computers. That way the limit is automatically bigger on systems with more memory. Example: .B "\-\-memlimit\-compress=70%" .IP \(bu 3 The .I limit can be reset back to its default value by setting it to .BR 0 . This is currently equivalent to setting the .I limit to .B max (no memory usage limit). Once multithreading support has been implemented, there may be a difference between .B 0 and .B max for the multithreaded case, so it is recommended to use .B 0 instead of .B max until the details have been decided. .RE .IP "" See also the section .BR "Memory usage" . .TP .BI \-\-memlimit\-decompress= limit Set a memory usage limit for decompression. This also affects the .B \-\-list mode. If the operation is not possible without exceeding the .IR limit , .B xz will display an error and decompressing the file will fail. See .BI \-\-memlimit\-compress= limit for possible ways to specify the .IR limit . .TP \fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit This is equivalent to specifying \fB\-\-memlimit\-compress=\fIlimit \fB\-\-memlimit\-decompress=\fIlimit\fR. .TP .B \-\-no\-adjust Display an error and exit if the compression settings exceed the memory usage limit. The default is to adjust the settings downwards so that the memory usage limit is not exceeded. Automatic adjusting is always disabled when creating raw streams .RB ( \-\-format=raw ). .TP \fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads Specify the number of worker threads to use. The actual number of threads can be less than .I threads if using more threads would exceed the memory usage limit. .IP "" .B "Multithreaded compression and decompression are not" .B "implemented yet, so this option has no effect for now." .IP "" .B "As of writing (2010-09-27), it hasn't been decided" .B "if threads will be used by default on multicore systems" .B "once support for threading has been implemented." .B "Comments are welcome." The complicating factor is that using many threads will increase the memory usage dramatically. Note that if multithreading will be the default, it will probably be done so that single-threaded and multithreaded modes produce the same output, so compression ratio won't be significantly affected if threading will be enabled by default. . .SS "Custom compressor filter chains" A custom filter chain allows specifying the compression settings in detail instead of relying on -the settings associated to the preset levels. +the settings associated to the presets. When a custom filter chain is specified, -the compression preset level options -(\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) are -silently ignored. +preset options (\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) +earlier on the command line are forgotten. +If a preset option is specified +after one or more custom filter chain options, +the new preset takes effect and +the custom filter chain options specified earlier are forgotten. .PP A filter chain is comparable to piping on the command line. When compressing, the uncompressed input goes to the first filter, whose output goes to the next filter (if any). The output of the last filter gets written to the compressed file. The maximum number of filters in the chain is four, but typically a filter chain has only one or two filters. .PP Many filters have limitations on where they can be in the filter chain: some filters can work only as the last filter in the chain, some only as a non-last filter, and some work in any position in the chain. Depending on the filter, this limitation is either inherent to the filter design or exists to prevent security issues. .PP A custom filter chain is specified by using one or more filter options in the order they are wanted in the filter chain. That is, the order of filter options is significant! When decoding raw streams .RB ( \-\-format=raw ), the filter chain is specified in the same order as it was specified when compressing. .PP Filters take filter-specific .I options as a comma-separated list. Extra commas in .I options are ignored. Every option has a default value, so you need to specify only those you want to change. +.PP +To see the whole filter chain and +.IR options , +use +.B "xz \-vv" +(that is, use +.B \-\-verbose +twice). +This works also for viewing the filter chain options used by presets. .TP \fB\-\-lzma1\fR[\fB=\fIoptions\fR] .PD 0 .TP \fB\-\-lzma2\fR[\fB=\fIoptions\fR] .PD Add LZMA1 or LZMA2 filter to the filter chain. These filters can be used only as the last filter in the chain. .IP "" LZMA1 is a legacy filter, which is supported almost solely due to the legacy .B .lzma file format, which supports only LZMA1. LZMA2 is an updated version of LZMA1 to fix some practical issues of LZMA1. The .B .xz format uses LZMA2 and doesn't support LZMA1 at all. Compression speed and ratios of LZMA1 and LZMA2 are practically the same. .IP "" LZMA1 and LZMA2 share the same set of .IR options : .RS .TP .BI preset= preset Reset all LZMA1 or LZMA2 .I options to .IR preset . .I Preset consist of an integer, which may be followed by single-letter preset modifiers. The integer can be from .B 0 to .BR 9 , matching the command line options \fB\-0\fR ... \fB\-9\fR. The only supported modifier is currently .BR e , which matches .BR \-\-extreme . -The default -.I preset -is -.BR 6 , -from which the default values for the rest of the LZMA1 or LZMA2 +If no +.B preset +is specified, the default values of LZMA1 or LZMA2 .I options -are taken. +are taken from the preset +.BR 6 . .TP .BI dict= size Dictionary (history buffer) .I size indicates how many bytes of the recently processed uncompressed data is kept in memory. The algorithm tries to find repeating byte sequences (matches) in the uncompressed data, and replace them with references to the data currently in the dictionary. The bigger the dictionary, the higher is the chance to find a match. Thus, increasing dictionary .I size usually improves compression ratio, but a dictionary bigger than the uncompressed file is waste of memory. .IP "" Typical dictionary .I size is from 64\ KiB to 64\ MiB. The minimum is 4\ KiB. The maximum for compression is currently 1.5\ GiB (1536\ MiB). The decompressor already supports dictionaries up to one byte less than 4\ GiB, which is the maximum for the LZMA1 and LZMA2 stream formats. .IP "" Dictionary .I size and match finder .RI ( mf ) together determine the memory usage of the LZMA1 or LZMA2 encoder. The same (or bigger) dictionary .I size is required for decompressing that was used when compressing, thus the memory usage of the decoder is determined by the dictionary size used when compressing. The .B .xz headers store the dictionary .I size either as .RI "2^" n or .RI "2^" n " + 2^(" n "\-1)," so these .I sizes are somewhat preferred for compression. Other .I sizes will get rounded up when stored in the .B .xz headers. .TP .BI lc= lc Specify the number of literal context bits. The minimum is 0 and the maximum is 4; the default is 3. In addition, the sum of .I lc and .I lp must not exceed 4. .IP "" All bytes that cannot be encoded as matches are encoded as literals. That is, literals are simply 8-bit bytes that are encoded one at a time. .IP "" The literal coding makes an assumption that the highest .I lc bits of the previous uncompressed byte correlate with the next byte. E.g. in typical English text, an upper-case letter is often followed by a lower-case letter, and a lower-case letter is usually followed by another lower-case letter. In the US-ASCII character set, the highest three bits are 010 for upper-case letters and 011 for lower-case letters. When .I lc is at least 3, the literal coding can take advantage of this property in the uncompressed data. .IP "" The default value (3) is usually good. If you want maximum compression, test .BR lc=4 . Sometimes it helps a little, and sometimes it makes compression worse. If it makes it worse, test e.g.\& .B lc=2 too. .TP .BI lp= lp Specify the number of literal position bits. The minimum is 0 and the maximum is 4; the default is 0. .IP "" .I Lp affects what kind of alignment in the uncompressed data is assumed when encoding literals. See .I pb below for more information about alignment. .TP .BI pb= pb Specify the number of position bits. The minimum is 0 and the maximum is 4; the default is 2. .IP "" .I Pb affects what kind of alignment in the uncompressed data is assumed in general. The default means four-byte alignment .RI (2^ pb =2^2=4), which is often a good choice when there's no better guess. .IP "" When the aligment is known, setting .I pb accordingly may reduce the file size a little. E.g. with text files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), setting .B pb=0 can improve compression slightly. For UTF-16 text, .B pb=1 is a good choice. If the alignment is an odd number like 3 bytes, .B pb=0 might be the best choice. .IP "" Even though the assumed alignment can be adjusted with .I pb and .IR lp , LZMA1 and LZMA2 still slightly favor 16-byte alignment. It might be worth taking into account when designing file formats that are likely to be often compressed with LZMA1 or LZMA2. .TP .BI mf= mf Match finder has a major effect on encoder speed, memory usage, and compression ratio. Usually Hash Chain match finders are faster than Binary Tree match finders. The default depends on the .IR preset : 0 uses .BR hc3 , 1\-3 use .BR hc4 , and the rest use .BR bt4 . .IP "" The following match finders are supported. The memory usage formulas below are rough approximations, which are closest to the reality when .I dict is a power of two. .RS .TP .B hc3 Hash Chain with 2- and 3-byte hashing .br Minimum value for .IR nice : 3 .br Memory usage: .br .I dict * 7.5 (if .I dict <= 16 MiB); .br .I dict * 5.5 + 64 MiB (if .I dict > 16 MiB) .TP .B hc4 Hash Chain with 2-, 3-, and 4-byte hashing .br Minimum value for .IR nice : 4 .br Memory usage: .br .I dict * 7.5 (if .I dict <= 32 MiB); .br .I dict * 6.5 (if .I dict > 32 MiB) .TP .B bt2 Binary Tree with 2-byte hashing .br Minimum value for .IR nice : 2 .br Memory usage: .I dict * 9.5 .TP .B bt3 Binary Tree with 2- and 3-byte hashing .br Minimum value for .IR nice : 3 .br Memory usage: .br .I dict * 11.5 (if .I dict <= 16 MiB); .br .I dict * 9.5 + 64 MiB (if .I dict > 16 MiB) .TP .B bt4 Binary Tree with 2-, 3-, and 4-byte hashing .br Minimum value for .IR nice : 4 .br Memory usage: .br .I dict * 11.5 (if .I dict <= 32 MiB); .br .I dict * 10.5 (if .I dict > 32 MiB) .RE .TP .BI mode= mode Compression .I mode specifies the method to analyze the data produced by the match finder. Supported .I modes are .B fast and .BR normal . The default is .B fast for .I presets 0\-3 and .B normal for .I presets 4\-9. .IP "" Usually .B fast is used with Hash Chain match finders and .B normal with Binary Tree match finders. This is also what the .I presets do. .TP .BI nice= nice Specify what is considered to be a nice length for a match. Once a match of at least .I nice bytes is found, the algorithm stops looking for possibly better matches. .IP "" .I Nice can be 2\-273 bytes. Higher values tend to give better compression ratio at the expense of speed. The default depends on the .IR preset . .TP .BI depth= depth Specify the maximum search depth in the match finder. The default is the special value of 0, which makes the compressor determine a reasonable .I depth from .I mf and .IR nice . .IP "" Reasonable .I depth for Hash Chains is 4\-100 and 16\-1000 for Binary Trees. Using very high values for .I depth can make the encoder extremely slow with some files. Avoid setting the .I depth over 1000 unless you are prepared to interrupt the compression in case it is taking far too long. .RE .IP "" When decoding raw streams .RB ( \-\-format=raw ), LZMA2 needs only the dictionary .IR size . LZMA1 needs also .IR lc , .IR lp , and .IR pb . .TP \fB\-\-x86\fR[\fB=\fIoptions\fR] .PD 0 .TP \fB\-\-powerpc\fR[\fB=\fIoptions\fR] .TP \fB\-\-ia64\fR[\fB=\fIoptions\fR] .TP \fB\-\-arm\fR[\fB=\fIoptions\fR] .TP \fB\-\-armthumb\fR[\fB=\fIoptions\fR] .TP \fB\-\-sparc\fR[\fB=\fIoptions\fR] .PD Add a branch/call/jump (BCJ) filter to the filter chain. These filters can be used only as a non-last filter in the filter chain. .IP "" A BCJ filter converts relative addresses in the machine code to their absolute counterparts. This doesn't change the size of the data, but it increases redundancy, which can help LZMA2 to produce 0\-15\ % smaller .B .xz file. The BCJ filters are always reversible, so using a BCJ filter for wrong type of data doesn't cause any data loss, although it may make the compression ratio slightly worse. .IP "" It is fine to apply a BCJ filter on a whole executable; there's no need to apply it only on the executable section. Applying a BCJ filter on an archive that contains both executable and non-executable files may or may not give good results, so it generally isn't good to blindly apply a BCJ filter when compressing binary packages for distribution. .IP "" These BCJ filters are very fast and use insignificant amount of memory. If a BCJ filter improves compression ratio of a file, it can improve decompression speed at the same time. This is because, on the same hardware, the decompression speed of LZMA2 is roughly a fixed number of bytes of compressed data per second. .IP "" These BCJ filters have known problems related to the compression ratio: .RS .IP \(bu 3 Some types of files containing executable code (e.g. object files, static libraries, and Linux kernel modules) have the addresses in the instructions filled with filler values. These BCJ filters will still do the address conversion, which will make the compression worse with these files. .IP \(bu 3 Applying a BCJ filter on an archive containing multiple similar executables can make the compression ratio worse than not using a BCJ filter. This is because the BCJ filter doesn't detect the boundaries of the executable files, and doesn't reset the address conversion counter for each executable. .RE .IP "" Both of the above problems will be fixed in the future in a new filter. The old BCJ filters will still be useful in embedded systems, because the decoder of the new filter will be bigger and use more memory. .IP "" Different instruction sets have have different alignment: .RS .RS .PP .TS tab(;); l n l l n l. Filter;Alignment;Notes x86;1;32-bit or 64-bit x86 PowerPC;4;Big endian only ARM;4;Little endian only ARM-Thumb;2;Little endian only IA-64;16;Big or little endian SPARC;4;Big or little endian .TE .RE .RE .IP "" Since the BCJ-filtered data is usually compressed with LZMA2, the compression ratio may be improved slightly if the LZMA2 options are set to match the alignment of the selected BCJ filter. For example, with the IA-64 filter, it's good to set .B pb=4 with LZMA2 (2^4=16). The x86 filter is an exception; it's usually good to stick to LZMA2's default four-byte alignment when compressing x86 executables. .IP "" All BCJ filters support the same .IR options : .RS .TP .BI start= offset Specify the start .I offset that is used when converting between relative and absolute addresses. The .I offset must be a multiple of the alignment of the filter (see the table above). The default is zero. In practice, the default is good; specifying a custom .I offset is almost never useful. .RE .TP \fB\-\-delta\fR[\fB=\fIoptions\fR] Add the Delta filter to the filter chain. The Delta filter can be only used as a non-last filter in the filter chain. .IP "" Currently only simple byte-wise delta calculation is supported. It can be useful when compressing e.g. uncompressed bitmap images or uncompressed PCM audio. However, special purpose algorithms may give significantly better results than Delta + LZMA2. This is true especially with audio, which compresses faster and better e.g. with .BR flac (1). .IP "" Supported .IR options : .RS .TP .BI dist= distance Specify the .I distance of the delta calculation in bytes. .I distance must be 1\-256. The default is 1. .IP "" For example, with .B dist=2 and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02. .RE . .SS "Other options" .TP .BR \-q ", " \-\-quiet Suppress warnings and notices. Specify this twice to suppress errors too. This option has no effect on the exit status. That is, even if a warning was suppressed, the exit status to indicate a warning is still used. .TP .BR \-v ", " \-\-verbose Be verbose. If standard error is connected to a terminal, .B xz will display a progress indicator. Specifying .B \-\-verbose twice will give even more verbose output. .IP "" The progress indicator shows the following information: .RS .IP \(bu 3 Completion percentage is shown if the size of the input file is known. That is, the percentage cannot be shown in pipes. .IP \(bu 3 Amount of compressed data produced (compressing) or consumed (decompressing). .IP \(bu 3 Amount of uncompressed data consumed (compressing) or produced (decompressing). .IP \(bu 3 Compression ratio, which is calculated by dividing the amount of compressed data processed so far by the amount of uncompressed data processed so far. .IP \(bu 3 Compression or decompression speed. This is measured as the amount of uncompressed data consumed (compression) or produced (decompression) per second. It is shown after a few seconds have passed since .B xz started processing the file. .IP \(bu 3 Elapsed time in the format M:SS or H:MM:SS. .IP \(bu 3 Estimated remaining time is shown only when the size of the input file is known and a couple of seconds have already passed since .B xz started processing the file. The time is shown in a less precise format which never has any colons, e.g. 2 min 30 s. .RE .IP "" When standard error is not a terminal, .B \-\-verbose will make .B xz print the filename, compressed size, uncompressed size, compression ratio, and possibly also the speed and elapsed time on a single line to standard error after compressing or decompressing the file. The speed and elapsed time are included only when the operation took at least a few seconds. If the operation didn't finish, e.g. due to user interruption, also the completion percentage is printed if the size of the input file is known. .TP .BR \-Q ", " \-\-no\-warn Don't set the exit status to 2 even if a condition worth a warning was detected. This option doesn't affect the verbosity level, thus both .B \-\-quiet and .B \-\-no\-warn have to be used to not display warnings and to not alter the exit status. .TP .B \-\-robot Print messages in a machine-parsable format. This is intended to ease writing frontends that want to use .B xz instead of liblzma, which may be the case with various scripts. The output with this option enabled is meant to be stable across .B xz releases. See the section .B "ROBOT MODE" for details. .TP .BR \-\-info\-memory Display, in human-readable format, how much physical memory (RAM) .B xz thinks the system has and the memory usage limits for compression and decompression, and exit successfully. .TP .BR \-h ", " \-\-help Display a help message describing the most commonly used options, and exit successfully. .TP .BR \-H ", " \-\-long\-help Display a help message describing all features of .BR xz , and exit successfully .TP .BR \-V ", " \-\-version Display the version number of .B xz and liblzma in human readable format. To get machine-parsable output, specify .B \-\-robot before .BR \-\-version . . .SH "ROBOT MODE" The robot mode is activated with the .B \-\-robot option. It makes the output of .B xz easier to parse by other programs. Currently .B \-\-robot is supported only together with .BR \-\-version , .BR \-\-info\-memory , and .BR \-\-list . -It will be supported for normal compression and +It will be supported for compression and decompression in the future. . .SS Version .B "xz \-\-robot \-\-version" will print the version number of .B xz and liblzma in the following format: .PP .BI XZ_VERSION= XYYYZZZS .br .BI LIBLZMA_VERSION= XYYYZZZS .TP .I X Major version. .TP .I YYY Minor version. Even numbers are stable. Odd numbers are alpha or beta versions. .TP .I ZZZ Patch level for stable releases or just a counter for development releases. .TP .I S Stability. 0 is alpha, 1 is beta, and 2 is stable. .I S should be always 2 when .I YYY is even. .PP .I XYYYZZZS are the same on both lines if .B xz and liblzma are from the same XZ Utils release. .PP Examples: 4.999.9beta is .B 49990091 and 5.0.0 is .BR 50000002 . . .SS "Memory limit information" .B "xz \-\-robot \-\-info\-memory" prints a single line with three tab-separated columns: .IP 1. 4 Total amount of physical memory (RAM) in bytes .IP 2. 4 Memory usage limit for compression in bytes. A special value of zero indicates the default setting, which for single-threaded mode is the same as no limit. .IP 3. 4 Memory usage limit for decompression in bytes. A special value of zero indicates the default setting, which for single-threaded mode is the same as no limit. .PP In the future, the output of .B "xz \-\-robot \-\-info\-memory" may have more columns, but never more than a single line. . .SS "List mode" .B "xz \-\-robot \-\-list" uses tab-separated output. The first column of every line has a string that indicates the type of the information found on that line: .TP .B name This is always the first line when starting to list a file. The second column on the line is the filename. .TP .B file This line contains overall information about the .B .xz file. This line is always printed after the .B name line. .TP .B stream This line type is used only when .B \-\-verbose was specified. There are as many .B stream lines as there are streams in the .B .xz file. .TP .B block This line type is used only when .B \-\-verbose was specified. There are as many .B block lines as there are blocks in the .B .xz file. The .B block lines are shown after all the .B stream lines; different line types are not interleaved. .TP .B summary This line type is used only when .B \-\-verbose was specified twice. This line is printed after all .B block lines. Like the .B file line, the .B summary line contains overall information about the .B .xz file. .TP .B totals This line is always the very last line of the list output. It shows the total counts and sizes. .PP The columns of the .B file lines: .PD 0 .RS .IP 2. 4 Number of streams in the file .IP 3. 4 Total number of blocks in the stream(s) .IP 4. 4 Compressed size of the file .IP 5. 4 Uncompressed size of the file .IP 6. 4 Compression ratio, for example .BR 0.123. If ratio is over 9.999, three dashes .RB ( \-\-\- ) are displayed instead of the ratio. .IP 7. 4 Comma-separated list of integrity check names. The following strings are used for the known check types: .BR None , .BR CRC32 , .BR CRC64 , and .BR SHA\-256 . For unknown check types, .BI Unknown\- N is used, where .I N is the Check ID as a decimal number (one or two digits). .IP 8. 4 Total size of stream padding in the file .RE .PD .PP The columns of the .B stream lines: .PD 0 .RS .IP 2. 4 Stream number (the first stream is 1) .IP 3. 4 Number of blocks in the stream .IP 4. 4 Compressed start offset .IP 5. 4 Uncompressed start offset .IP 6. 4 Compressed size (does not include stream padding) .IP 7. 4 Uncompressed size .IP 8. 4 Compression ratio .IP 9. 4 Name of the integrity check .IP 10. 4 Size of stream padding .RE .PD .PP The columns of the .B block lines: .PD 0 .RS .IP 2. 4 Number of the stream containing this block .IP 3. 4 Block number relative to the beginning of the stream (the first block is 1) .IP 4. 4 Block number relative to the beginning of the file .IP 5. 4 Compressed start offset relative to the beginning of the file .IP 6. 4 Uncompressed start offset relative to the beginning of the file .IP 7. 4 Total compressed size of the block (includes headers) .IP 8. 4 Uncompressed size .IP 9. 4 Compression ratio .IP 10. 4 Name of the integrity check .RE .PD .PP If .B \-\-verbose was specified twice, additional columns are included on the .B block lines. These are not displayed with a single .BR \-\-verbose , because getting this information requires many seeks and can thus be slow: .PD 0 .RS .IP 11. 4 Value of the integrity check in hexadecimal .IP 12. 4 Block header size .IP 13. 4 Block flags: .B c indicates that compressed size is present, and .B u indicates that uncompressed size is present. If the flag is not set, a dash .RB ( \- ) is shown instead to keep the string length fixed. New flags may be added to the end of the string in the future. .IP 14. 4 Size of the actual compressed data in the block (this excludes the block header, block padding, and check fields) .IP 15. 4 Amount of memory (in bytes) required to decompress this block with this .B xz version .IP 16. 4 Filter chain. Note that most of the options used at compression time cannot be known, because only the options that are needed for decompression are stored in the .B .xz headers. .RE .PD .PP The columns of the .B summary lines: .PD 0 .RS .IP 2. 4 Amount of memory (in bytes) required to decompress this file with this .B xz version .IP 3. 4 .B yes or .B no indicating if all block headers have both compressed size and uncompressed size stored in them .RE .PD .PP The columns of the .B totals line: .PD 0 .RS .IP 2. 4 Number of streams .IP 3. 4 Number of blocks .IP 4. 4 Compressed size .IP 5. 4 Uncompressed size .IP 6. 4 Average compression ratio .IP 7. 4 Comma-separated list of integrity check names that were present in the files .IP 8. 4 Stream padding size .IP 9. 4 Number of files. This is here to keep the order of the earlier columns the same as on .B file lines. .PD .RE .PP If .B \-\-verbose was specified twice, additional columns are included on the .B totals line: .PD 0 .RS .IP 10. 4 Maximum amount of memory (in bytes) required to decompress the files with this .B xz version .IP 11. 4 .B yes or .B no indicating if all block headers have both compressed size and uncompressed size stored in them .RE .PD .PP Future versions may add new line types and new columns can be added to the existing line types, but the existing columns won't be changed. . .SH "EXIT STATUS" .TP .B 0 All is good. .TP .B 1 An error occurred. .TP .B 2 Something worth a warning occurred, but no actual errors occurred. .PP Notices (not warnings or errors) printed on standard error don't affect the exit status. . .SH ENVIRONMENT .B xz parses space-separated lists of options from the environment variables .B XZ_DEFAULTS and .BR XZ_OPT , in this order, before parsing the options from the command line. Note that only options are parsed from the environment variables; all non-options are silently ignored. Parsing is done with .BR getopt_long (3) which is used also for the command line arguments. .TP .B XZ_DEFAULTS User-specific or system-wide default options. Typically this is set in a shell initialization script to enable .BR xz 's memory usage limiter by default. Excluding shell initialization scripts and similar special cases, scripts must never set or unset .BR XZ_DEFAULTS . .TP .B XZ_OPT This is for passing options to .B xz when it is not possible to set the options directly on the .B xz command line. This is the case e.g. when .B xz is run by a script or tool, e.g. GNU .BR tar (1): .RS .RS .PP .nf .ft CW XZ_OPT=\-2v tar caf foo.tar.xz foo .ft R .fi .RE .RE .IP "" Scripts may use .B XZ_OPT e.g. to set script-specific default compression options. It is still recommended to allow users to override .B XZ_OPT if that is reasonable, e.g. in .BR sh (1) scripts one may use something like this: .RS .RS .PP .nf .ft CW XZ_OPT=${XZ_OPT\-"\-7e"} export XZ_OPT .ft R .fi .RE .RE . .SH "LZMA UTILS COMPATIBILITY" The command line syntax of .B xz is practically a superset of .BR lzma , .BR unlzma , and .BR lzcat as found from LZMA Utils 4.32.x. In most cases, it is possible to replace LZMA Utils with XZ Utils without breaking existing scripts. There are some incompatibilities though, which may sometimes cause problems. . .SS "Compression preset levels" The numbering of the compression level presets is not identical in .B xz and LZMA Utils. The most important difference is how dictionary sizes are mapped to different presets. Dictionary size is roughly equal to the decompressor memory usage. .RS .PP .TS tab(;); c c c c n n. Level;xz;LZMA Utils \-0;256 KiB;N/A \-1;1 MiB;64 KiB \-2;2 MiB;1 MiB \-3;4 MiB;512 KiB \-4;4 MiB;1 MiB \-5;8 MiB;2 MiB \-6;8 MiB;4 MiB \-7;16 MiB;8 MiB \-8;32 MiB;16 MiB \-9;64 MiB;32 MiB .TE .RE .PP The dictionary size differences affect the compressor memory usage too, but there are some other differences between LZMA Utils and XZ Utils, which make the difference even bigger: .RS .PP .TS tab(;); c c c c n n. Level;xz;LZMA Utils 4.32.x \-0;3 MiB;N/A \-1;9 MiB;2 MiB \-2;17 MiB;12 MiB \-3;32 MiB;12 MiB \-4;48 MiB;16 MiB \-5;94 MiB;26 MiB \-6;94 MiB;45 MiB \-7;186 MiB;83 MiB \-8;370 MiB;159 MiB \-9;674 MiB;311 MiB .TE .RE .PP The default preset level in LZMA Utils is .B \-7 while in XZ Utils it is .BR \-6 , so both use an 8 MiB dictionary by default. . .SS "Streamed vs. non-streamed .lzma files" The uncompressed size of the file can be stored in the .B .lzma header. LZMA Utils does that when compressing regular files. The alternative is to mark that uncompressed size is unknown and use end-of-payload marker to indicate where the decompressor should stop. LZMA Utils uses this method when uncompressed size isn't known, which is the case for example in pipes. .PP .B xz supports decompressing .B .lzma files with or without end-of-payload marker, but all .B .lzma files created by .B xz will use end-of-payload marker and have uncompressed size marked as unknown in the .B .lzma header. This may be a problem in some uncommon situations. For example, a .B .lzma decompressor in an embedded device might work only with files that have known uncompressed size. If you hit this problem, you need to use LZMA Utils or LZMA SDK to create .B .lzma files with known uncompressed size. . .SS "Unsupported .lzma files" The .B .lzma format allows .I lc values up to 8, and .I lp values up to 4. LZMA Utils can decompress files with any .I lc and .IR lp , but always creates files with .B lc=3 and .BR lp=0 . Creating files with other .I lc and .I lp is possible with .B xz and with LZMA SDK. .PP The implementation of the LZMA1 filter in liblzma requires that the sum of .I lc and .I lp must not exceed 4. Thus, .B .lzma files, which exceed this limitation, cannot be decompressed with .BR xz . .PP LZMA Utils creates only .B .lzma files which have a dictionary size of .RI "2^" n (a power of 2) but accepts files with any dictionary size. liblzma accepts only .B .lzma files which have a dictionary size of .RI "2^" n or .RI "2^" n " + 2^(" n "\-1)." This is to decrease false positives when detecting .B .lzma files. .PP These limitations shouldn't be a problem in practice, since practically all .B .lzma files have been compressed with settings that liblzma will accept. . .SS "Trailing garbage" When decompressing, LZMA Utils silently ignore everything after the first .B .lzma stream. In most situations, this is a bug. This also means that LZMA Utils don't support decompressing concatenated .B .lzma files. .PP If there is data left after the first .B .lzma stream, .B xz considers the file to be corrupt. This may break obscure scripts which have assumed that trailing garbage is ignored. . .SH NOTES . .SS "Compressed output may vary" The exact compressed output produced from the same uncompressed input file may vary between XZ Utils versions even if compression options are identical. This is because the encoder can be improved (faster or better compression) without affecting the file format. The output can vary even between different builds of the same XZ Utils version, if different build options are used. .PP -The above means that implementing +The above means that once .B \-\-rsyncable -to create rsyncable -.B .xz -files is not going to happen without -freezing a part of the encoder -implementation, which can then be used with -.BR \-\-rsyncable . +has been implemented, +the resulting files won't necessarily be rsyncable +unless both old and new files have been compressed +with the same xz version. +This problem can be fixed if a part of the encoder +implementation is frozen to keep rsyncable output +stable across xz versions. . .SS "Embedded .xz decompressors" Embedded .B .xz decompressor implementations like XZ Embedded don't necessarily support files created with integrity .I check types other than .B none and .BR crc32 . Since the default is .BR \-\-check=crc64 , you must use .B \-\-check=none or .B \-\-check=crc32 when creating files for embedded systems. .PP Outside embedded systems, all .B .xz format decompressors support all the .I check types, or at least are able to decompress the file without verifying the integrity check if the particular .I check is not supported. .PP XZ Embedded supports BCJ filters, but only with the default start offset. . .SH EXAMPLES . .SS Basics Compress the file .I foo into .I foo.xz using the default compression level .RB ( \-6 ), and remove .I foo if compression is successful: .RS .PP .nf .ft CW xz foo .ft R .fi .RE .PP Decompress .I bar.xz into .I bar and don't remove .I bar.xz even if decompression is successful: .RS .PP .nf .ft CW xz \-dk bar.xz .ft R .fi .RE .PP Create .I baz.tar.xz with the preset .B \-4e .RB ( "\-4 \-\-extreme" ), which is slower than e.g. the default .BR \-6 , but needs less memory for compression and decompression (48\ MiB and 5\ MiB, respectively): .RS .PP .nf .ft CW tar cf \- baz | xz \-4e > baz.tar.xz .ft R .fi .RE .PP A mix of compressed and uncompressed files can be decompressed to standard output with a single command: .RS .PP .nf .ft CW xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt .ft R .fi .RE . .SS "Parallel compression of many files" On GNU and *BSD, .BR find (1) and .BR xargs (1) can be used to parallelize compression of many files: .RS .PP .nf .ft CW find . \-type f \e! \-name '*.xz' \-print0 \e | xargs \-0r \-P4 \-n16 xz \-T1 .ft R .fi .RE .PP The .B \-P option to .BR xargs (1) sets the number of parallel .B xz processes. The best value for the .B \-n option depends on how many files there are to be compressed. If there are only a couple of files, the value should probably be 1; with tens of thousands of files, 100 or even more may be appropriate to reduce the number of .B xz processes that .BR xargs (1) will eventually create. .PP The option .B \-T1 for .B xz is there to force it to single-threaded mode, because .BR xargs (1) is used to control the amount of parallelization. . .SS "Robot mode" Calculate how many bytes have been saved in total after compressing multiple files: .RS .PP .nf .ft CW xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}' .ft R .fi .RE .PP A script may want to know that it is using new enough .BR xz . The following .BR sh (1) script checks that the version number of the .B xz tool is at least 5.0.0. This method is compatible with old beta versions, which didn't support the .B \-\-robot option: .RS .PP .nf .ft CW if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" || [ "$XZ_VERSION" \-lt 50000002 ]; then echo "Your xz is too old." fi unset XZ_VERSION LIBLZMA_VERSION .ft R .fi .RE .PP Set a memory usage limit for decompression using .BR XZ_OPT , but if a limit has already been set, don't increase it: .RS .PP .nf .ft CW NEWLIM=$((123 << 20)) # 123 MiB OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3) if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM" export XZ_OPT fi .ft R .fi .RE . .SS "Custom compressor filter chains" The simplest use for custom filter chains is customizing a LZMA2 preset. This can be useful, because the presets cover only a subset of the potentially useful combinations of compression settings. .PP The CompCPU columns of the tables from the descriptions of the options .BR "\-0" " ... " "\-9" and .B \-\-extreme are useful when customizing LZMA2 presets. Here are the relevant parts collected from those two tables: .RS .PP .TS tab(;); c c n n. Preset;CompCPU \-0;0 \-1;1 \-2;2 \-3;3 \-4;4 \-5;5 \-6;6 \-5e;7 \-6e;8 .TE .RE .PP If you know that a file requires somewhat big dictionary (e.g. 32 MiB) to compress well, but you want to compress it quicker than .B "xz \-8" would do, a preset with a low CompCPU value (e.g. 1) can be modified to use a bigger dictionary: .RS .PP .nf .ft CW xz \-\-lzma2=preset=1,dict=32MiB foo.tar .ft R .fi .RE .PP With certain files, the above command may be faster than .B "xz \-6" while compressing significantly better. However, it must be emphasized that only some files benefit from a big dictionary while keeping the CompCPU value low. The most obvious situation, where a big dictionary can help a lot, is an archive containing very similar files of at least a few megabytes each. The dictionary size has to be significantly bigger than any individual file to allow LZMA2 to take full advantage of the similarities between consecutive files. .PP If very high compressor and decompressor memory usage is fine, and the file being compressed is at least several hundred megabytes, it may be useful to use an even bigger dictionary than the 64 MiB that .B "xz \-9" would use: .RS .PP .nf .ft CW xz \-vv \-\-lzma2=dict=192MiB big_foo.tar .ft R .fi .RE .PP Using .B \-vv .RB ( "\-\-verbose \-\-verbose" ) like in the above example can be useful to see the memory requirements of the compressor and decompressor. Remember that using a dictionary bigger than the size of the uncompressed file is waste of memory, so the above command isn't useful for small files. .PP Sometimes the compression time doesn't matter, but the decompressor memory usage has to be kept low e.g. to make it possible to decompress the file on an embedded system. The following command uses .B \-6e .RB ( "\-6 \-\-extreme" ) as a base and sets the dictionary to only 64\ KiB. The resulting file can be decompressed with XZ Embedded (that's why there is .BR \-\-check=crc32 ) using about 100\ KiB of memory. .RS .PP .nf .ft CW xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo .ft R .fi .RE .PP If you want to squeeze out as many bytes as possible, adjusting the number of literal context bits .RI ( lc ) and number of position bits .RI ( pb ) can sometimes help. Adjusting the number of literal position bits .RI ( lp ) might help too, but usually .I lc and .I pb are more important. E.g. a source code archive contains mostly US-ASCII text, so something like the following might give slightly (like 0.1\ %) smaller file than .B "xz \-6e" (try also without .BR lc=4 ): .RS .PP .nf .ft CW xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar .ft R .fi .RE .PP Using another filter together with LZMA2 can improve compression with certain file types. E.g. to compress a x86-32 or x86-64 shared library using the x86 BCJ filter: .RS .PP .nf .ft CW xz \-\-x86 \-\-lzma2 libfoo.so .ft R .fi .RE .PP Note that the order of the filter options is significant. If .B \-\-x86 is specified after .BR \-\-lzma2 , .B xz will give an error, because there cannot be any filter after LZMA2, and also because the x86 BCJ filter cannot be used as the last filter in the chain. .PP The Delta filter together with LZMA2 can give good results with bitmap images. It should usually beat PNG, which has a few more advanced filters than simple delta but uses Deflate for the actual compression. .PP The image has to be saved in uncompressed format, e.g. as uncompressed TIFF. The distance parameter of the Delta filter is set to match the number of bytes per pixel in the image. E.g. 24-bit RGB bitmap needs .BR dist=3 , and it is also good to pass .B pb=0 to LZMA2 to accommodate the three-byte alignment: .RS .PP .nf .ft CW xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff .ft R .fi .RE .PP If multiple images have been put into a single archive (e.g.\& .BR .tar ), the Delta filter will work on that too as long as all images have the same number of bytes per pixel. . .SH "SEE ALSO" .BR xzdec (1), .BR xzdiff (1), .BR xzgrep (1), .BR xzless (1), .BR xzmore (1), .BR gzip (1), .BR bzip2 (1), .BR 7z (1) .PP XZ Utils: .br XZ Embedded: .br LZMA SDK: diff --git a/src/xzdec/xzdec.1 b/src/xzdec/xzdec.1 index 7cc9be5de086..1e5ced945aae 100644 --- a/src/xzdec/xzdec.1 +++ b/src/xzdec/xzdec.1 @@ -1,146 +1,146 @@ .\" .\" Author: Lasse Collin .\" .\" This file has been put into the public domain. .\" You can do whatever you want with this file. .\" -.TH XZDEC 1 "2010-09-27" "Tukaani" "XZ Utils" +.TH XZDEC 1 "2013-06-30" "Tukaani" "XZ Utils" .SH NAME xzdec, lzmadec \- Small .xz and .lzma decompressors .SH SYNOPSIS .B xzdec -.RI [ option ]... -.RI [ file ]... +.RI [ option... ] +.RI [ file... ] .br .B lzmadec -.RI [ option ]... -.RI [ file ]... +.RI [ option... ] +.RI [ file... ] .SH DESCRIPTION .B xzdec is a liblzma-based decompression-only tool for .B .xz (and only .BR .xz ) files. .B xzdec is intended to work as a drop-in replacement for .BR xz (1) in the most common situations where a script has been written to use .B "xz \-\-decompress \-\-stdout" (and possibly a few other commonly used options) to decompress .B .xz files. .B lzmadec is identical to .B xzdec except that .B lzmadec supports .B .lzma files instead of .B .xz files. .PP To reduce the size of the executable, .B xzdec doesn't support multithreading or localization, and doesn't read options from .B XZ_DEFAULTS and .B XZ_OPT environment variables. .B xzdec doesn't support displaying intermediate progress information: sending .B SIGINFO to .B xzdec does nothing, but sending .B SIGUSR1 terminates the process instead of displaying progress information. .SH OPTIONS .TP .BR \-d ", " \-\-decompress ", " \-\-uncompress Ignored for .BR xz (1) compatibility. .B xzdec supports only decompression. .TP .BR \-k ", " \-\-keep Ignored for .BR xz (1) compatibility. .B xzdec never creates or removes any files. .TP .BR \-c ", " \-\-stdout ", " \-\-to-stdout Ignored for .BR xz (1) compatibility. .B xzdec always writes the decompressed data to standard output. .TP .BR \-q ", " \-\-quiet Specifying this once does nothing since .B xzdec never displays any warnings or notices. Specify this twice to suppress errors. .TP .BR \-Q ", " \-\-no-warn Ignored for .BR xz (1) compatibility. .B xzdec never uses the exit status 2. .TP .BR \-h ", " \-\-help Display a help message and exit successfully. .TP .BR \-V ", " \-\-version Display the version number of .B xzdec and liblzma. .SH "EXIT STATUS" .TP .B 0 All was good. .TP .B 1 An error occurred. .PP .B xzdec doesn't have any warning messages like .BR xz (1) has, thus the exit status 2 is not used by .BR xzdec . .SH NOTES Use .BR xz (1) instead of .B xzdec or .B lzmadec for normal everyday use. .B xzdec or .B lzmadec are meant only for situations where it is important to have a smaller decompressor than the full-featured .BR xz (1). .PP .B xzdec and .B lzmadec are not really that small. The size can be reduced further by dropping features from liblzma at compile time, but that shouldn't usually be done for executables distributed in typical non-embedded operating system distributions. If you need a truly small .B .xz decompressor, consider using XZ Embedded. .SH "SEE ALSO" .BR xz (1) .PP XZ Embedded: diff --git a/src/xzdec/xzdec.c b/src/xzdec/xzdec.c index b7830db4b523..5cb7530afce4 100644 --- a/src/xzdec/xzdec.c +++ b/src/xzdec/xzdec.c @@ -1,323 +1,323 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file xzdec.c /// \brief Simple single-threaded tool to uncompress .xz or .lzma files // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "sysdefs.h" #include "lzma.h" #include #include #include #include #include "getopt.h" #include "tuklib_progname.h" #include "tuklib_exit.h" #ifdef TUKLIB_DOSLIKE # include # include #endif #ifdef LZMADEC # define TOOL_FORMAT "lzma" #else # define TOOL_FORMAT "xz" #endif /// Error messages are suppressed if this is zero, which is the case when /// --quiet has been given at least twice. static unsigned int display_errors = 2; static void lzma_attribute((__format__(__printf__, 1, 2))) my_errorf(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (display_errors) { fprintf(stderr, "%s: ", progname); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); } va_end(ap); return; } static void lzma_attribute((__noreturn__)) help(void) { printf( "Usage: %s [OPTION]... [FILE]...\n" -"Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n" +"Decompress files in the ." TOOL_FORMAT " format to standard output.\n" "\n" -" -c, --stdout (ignored)\n" -" -d, --decompress (ignored)\n" -" -k, --keep (ignored)\n" +" -d, --decompress (ignored, only decompression is supported)\n" +" -k, --keep (ignored, files are never deleted)\n" +" -c, --stdout (ignored, output is always written to standard output)\n" " -q, --quiet specify *twice* to suppress errors\n" -" -Q, --no-warn (ignored)\n" +" -Q, --no-warn (ignored, the exit status 2 is never used)\n" " -h, --help display this help and exit\n" " -V, --version display the version number and exit\n" "\n" "With no FILE, or when FILE is -, read standard input.\n" "\n" "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n" PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname); tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); } static void lzma_attribute((__noreturn__)) version(void) { printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n" "liblzma %s\n", lzma_version_string()); tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); } /// Parses command line options. static void parse_options(int argc, char **argv) { static const char short_opts[] = "cdkM:hqQV"; static const struct option long_opts[] = { { "stdout", no_argument, NULL, 'c' }, { "to-stdout", no_argument, NULL, 'c' }, { "decompress", no_argument, NULL, 'd' }, { "uncompress", no_argument, NULL, 'd' }, { "keep", no_argument, NULL, 'k' }, { "quiet", no_argument, NULL, 'q' }, { "no-warn", no_argument, NULL, 'Q' }, { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, { NULL, 0, NULL, 0 } }; int c; while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { switch (c) { case 'c': case 'd': case 'k': case 'Q': break; case 'q': if (display_errors > 0) --display_errors; break; case 'h': help(); case 'V': version(); default: exit(EXIT_FAILURE); } } return; } static void uncompress(lzma_stream *strm, FILE *file, const char *filename) { lzma_ret ret; // Initialize the decoder #ifdef LZMADEC ret = lzma_alone_decoder(strm, UINT64_MAX); #else ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED); #endif // The only reasonable error here is LZMA_MEM_ERROR. if (ret != LZMA_OK) { my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) : "Internal error (bug)"); exit(EXIT_FAILURE); } // Input and output buffers uint8_t in_buf[BUFSIZ]; uint8_t out_buf[BUFSIZ]; strm->avail_in = 0; strm->next_out = out_buf; strm->avail_out = BUFSIZ; lzma_action action = LZMA_RUN; while (true) { if (strm->avail_in == 0) { strm->next_in = in_buf; strm->avail_in = fread(in_buf, 1, BUFSIZ, file); if (ferror(file)) { // POSIX says that fread() sets errno if // an error occurred. ferror() doesn't // touch errno. my_errorf("%s: Error reading input file: %s", filename, strerror(errno)); exit(EXIT_FAILURE); } #ifndef LZMADEC // When using LZMA_CONCATENATED, we need to tell // liblzma when it has got all the input. if (feof(file)) action = LZMA_FINISH; #endif } ret = lzma_code(strm, action); // Write and check write error before checking decoder error. // This way as much data as possible gets written to output // even if decoder detected an error. if (strm->avail_out == 0 || ret != LZMA_OK) { const size_t write_size = BUFSIZ - strm->avail_out; if (fwrite(out_buf, 1, write_size, stdout) != write_size) { // Wouldn't be a surprise if writing to stderr // would fail too but at least try to show an // error message. my_errorf("Cannot write to standard output: " "%s", strerror(errno)); exit(EXIT_FAILURE); } strm->next_out = out_buf; strm->avail_out = BUFSIZ; } if (ret != LZMA_OK) { if (ret == LZMA_STREAM_END) { #ifdef LZMADEC // Check that there's no trailing garbage. if (strm->avail_in != 0 || fread(in_buf, 1, 1, file) != 0 || !feof(file)) ret = LZMA_DATA_ERROR; else return; #else // lzma_stream_decoder() already guarantees // that there's no trailing garbage. assert(strm->avail_in == 0); assert(action == LZMA_FINISH); assert(feof(file)); return; #endif } const char *msg; switch (ret) { case LZMA_MEM_ERROR: msg = strerror(ENOMEM); break; case LZMA_FORMAT_ERROR: msg = "File format not recognized"; break; case LZMA_OPTIONS_ERROR: // FIXME: Better message? msg = "Unsupported compression options"; break; case LZMA_DATA_ERROR: msg = "File is corrupt"; break; case LZMA_BUF_ERROR: msg = "Unexpected end of input"; break; default: msg = "Internal error (bug)"; break; } my_errorf("%s: %s", filename, msg); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { // Initialize progname which we will be used in error messages. tuklib_progname_init(argv); // Parse the command line options. parse_options(argc, argv); // The same lzma_stream is used for all files that we decode. This way // we don't need to reallocate memory for every file if they use same // compression settings. lzma_stream strm = LZMA_STREAM_INIT; // Some systems require setting stdin and stdout to binary mode. #ifdef TUKLIB_DOSLIKE setmode(fileno(stdin), O_BINARY); setmode(fileno(stdout), O_BINARY); #endif if (optind == argc) { // No filenames given, decode from stdin. uncompress(&strm, stdin, "(stdin)"); } else { // Loop through the filenames given on the command line. do { // "-" indicates stdin. if (strcmp(argv[optind], "-") == 0) { uncompress(&strm, stdin, "(stdin)"); } else { FILE *file = fopen(argv[optind], "rb"); if (file == NULL) { my_errorf("%s: %s", argv[optind], strerror(errno)); exit(EXIT_FAILURE); } uncompress(&strm, file, argv[optind]); fclose(file); } } while (++optind < argc); } #ifndef NDEBUG // Free the memory only when debugging. Freeing wastes some time, // but allows detecting possible memory leaks with Valgrind. lzma_end(&strm); #endif tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); }