Index: sbin/savecore/Makefile =================================================================== --- sbin/savecore/Makefile +++ sbin/savecore/Makefile @@ -2,7 +2,9 @@ PACKAGE=runtime PROG= savecore -LIBADD= z xo +LIBADD= z xo lzma MAN= savecore.8 +SRCS= ${PROG}.c \ + savecore_xz.c .include Index: sbin/savecore/savecore.h =================================================================== --- /dev/null +++ sbin/savecore/savecore.h @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 2017 Netflix, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __SAVECORE_H +#define __SAVECORE_H 1 +FILE *xzopen(const char * restrict path, const char * restrict mode, + uint32_t threads, uint32_t preset, lzma_check check, size_t ibufsz, + size_t obufsz); +#endif Index: sbin/savecore/savecore.8 =================================================================== --- sbin/savecore/savecore.8 +++ sbin/savecore/savecore.8 @@ -28,7 +28,7 @@ .\" From: @(#)savecore.8 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd December 10, 2016 +.Dd January 18, 2017 .Dt SAVECORE 8 .Os .Sh NAME @@ -45,7 +45,7 @@ .Op Ar device ... .Nm .Op Fl -libxo -.Op Fl fkvz +.Op Fl Jfkvz .Op Fl m Ar maxdumps .Op Ar directory Op Ar device ... .Sh DESCRIPTION @@ -77,6 +77,12 @@ This option is compatible only with the .Op Fl v option. +.It Fl J +Compress the core dump using the xz format (see +.Xr xz 1 ) . +The +.Nm +utility will attempt to accomplish the compression using multiple threads. .It Fl c Clear the dump, so that future invocations of .Nm @@ -165,6 +171,7 @@ .Xr rc 8 ) . .Sh SEE ALSO .Xr gzip 1 , +.Xr xz 1 , .Xr getbootfile 3 , .Xr libxo 3 , .Xr xo_parse_args 3 , Index: sbin/savecore/savecore.c =================================================================== --- sbin/savecore/savecore.c +++ sbin/savecore/savecore.c @@ -67,7 +67,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -81,8 +83,11 @@ #include #include #include +#include #include +#include "savecore.h" + /* The size of the buffer used for I/O. */ #define BUFFERSIZE (1024*1024) @@ -90,7 +95,7 @@ #define STATUS_GOOD 1 #define STATUS_UNKNOWN 2 -static int checkfor, compress, clear, force, keep, verbose; /* flags */ +static int checkfor, compress, clear, force, keep, verbose, xzcompress; /* flags */ static int nfound, nsaved, nerr; /* statistics */ static int maxdumps; @@ -99,6 +104,8 @@ static sig_atomic_t got_siginfo; static void infohandler(int); +static const char *fileexts[] = { "", ".gz", ".xz", NULL }; + static void printheader(xo_handle_t *xo, const struct kerneldumpheader *h, const char *device, int bounds, const int status) @@ -220,6 +227,7 @@ static off_t saved_dump_size(int bounds) { + const char **ext; static char path[PATH_MAX]; off_t dumpsize; @@ -227,47 +235,50 @@ (void)snprintf(path, sizeof(path), "info.%d", bounds); dumpsize += file_size(path); - (void)snprintf(path, sizeof(path), "vmcore.%d", bounds); - dumpsize += file_size(path); - (void)snprintf(path, sizeof(path), "vmcore.%d.gz", bounds); - dumpsize += file_size(path); - (void)snprintf(path, sizeof(path), "textdump.tar.%d", bounds); - dumpsize += file_size(path); - (void)snprintf(path, sizeof(path), "textdump.tar.%d.gz", bounds); - dumpsize += file_size(path); - + for (ext = fileexts; *ext != NULL; ext++) { + (void)snprintf(path, sizeof(path), "vmcore.%d%s", bounds, *ext); + dumpsize += file_size(path); + (void)snprintf(path, sizeof(path), "textdump.tar.%d%s", bounds, + *ext); + dumpsize += file_size(path); + } return (dumpsize); } static void saved_dump_remove(int bounds) { + const char **ext; static char path[PATH_MAX]; (void)snprintf(path, sizeof(path), "info.%d", bounds); (void)unlink(path); - (void)snprintf(path, sizeof(path), "vmcore.%d", bounds); - (void)unlink(path); - (void)snprintf(path, sizeof(path), "vmcore.%d.gz", bounds); - (void)unlink(path); - (void)snprintf(path, sizeof(path), "textdump.tar.%d", bounds); - (void)unlink(path); - (void)snprintf(path, sizeof(path), "textdump.tar.%d.gz", bounds); - (void)unlink(path); + for (ext = fileexts; *ext != NULL; ext++) { + (void)snprintf(path, sizeof(path), "vmcore.%d%s", bounds, *ext); + (void)unlink(path); + (void)snprintf(path, sizeof(path), "textdump.tar.%d%s", bounds, + *ext); + (void)unlink(path); + } } static void symlinks_remove(void) { + const char **ext; + char path[PATH_MAX]; (void)unlink("info.last"); (void)unlink("key.last"); - (void)unlink("vmcore.last"); - (void)unlink("vmcore.last.gz"); - (void)unlink("vmcore_encrypted.last"); - (void)unlink("vmcore_encrypted.last.gz"); - (void)unlink("textdump.tar.last"); - (void)unlink("textdump.tar.last.gz"); + for (ext = fileexts; *ext != NULL; ext++) { + (void)snprintf(path, sizeof(path), "vmcore.last%s", *ext); + (void)unlink(path); + (void)snprintf(path, sizeof(path), "vmcore_encrypted.last%s", + *ext); + (void)unlink(path); + (void)snprintf(path, sizeof(path), "textdump.tar.last%s", *ext); + (void)unlink(path); + } } /* @@ -341,7 +352,7 @@ nerr++; return (-1); } - if (compress || isencrypted) { + if (compress || xzcompress || isencrypted) { nw = fwrite(buf, 1, wl, fp); } else { for (nw = 0; nw < nr; nw = he) { @@ -465,6 +476,7 @@ static char keyname[PATH_MAX]; static char *buf = NULL; char *temp = NULL; + const char *ext; struct kerneldumpheader kdhf, kdhl; uint8_t *dumpkey; off_t mediasize, dumpsize, firsthd, lasthd; @@ -682,17 +694,23 @@ oumask = umask(S_IRWXG|S_IRWXO); /* Restrict access to the core file.*/ isencrypted = (dumpkeysize > 0); - if (compress) { - snprintf(corename, sizeof(corename), "%s.%d.gz", - istextdump ? "textdump.tar" : - (isencrypted ? "vmcore_encrypted" : "vmcore"), bounds); + if (compress) + ext = ".gz"; + else if (xzcompress) + ext = ".xz"; + else + ext = ""; + snprintf(corename, sizeof(corename), "%s.%d%s", + istextdump ? "textdump.tar" : + (isencrypted ? "vmcore_encrypted" : "vmcore"), bounds, ext); + if (compress) fp = zopen(corename, "w"); - } else { - snprintf(corename, sizeof(corename), "%s.%d", - istextdump ? "textdump.tar" : - (isencrypted ? "vmcore_encrypted" : "vmcore"), bounds); + else if (xzcompress) + fp = xzopen(corename, "w", istextdump ? 1 : lzma_cputhreads(), + istextdump ? 0 : 1, LZMA_CHECK_CRC64, BUFFERSIZE, + 10 * BUFFERSIZE); + else fp = fopen(corename, "w"); - } if (fp == NULL) { syslog(LOG_ERR, "%s: %m", corename); close(fdinfo); @@ -751,8 +769,8 @@ } syslog(LOG_NOTICE, "writing %s%score to %s/%s", - isencrypted ? "encrypted " : "", compress ? "compressed " : "", - savedir, corename); + isencrypted ? "encrypted " : "", + (compress || xzcompress) ? "compressed " : "", savedir, corename); if (istextdump) { if (DoTextdumpFile(fd, dumpsize, lasthd, buf, device, @@ -785,15 +803,9 @@ "key.last"); } } - if (compress) { - snprintf(linkname, sizeof(linkname), "%s.last.gz", - istextdump ? "textdump.tar" : - (isencrypted ? "vmcore_encrypted" : "vmcore")); - } else { - snprintf(linkname, sizeof(linkname), "%s.last", - istextdump ? "textdump.tar" : - (isencrypted ? "vmcore_encrypted" : "vmcore")); - } + snprintf(linkname, sizeof(linkname), "%s.last%s", + istextdump ? "textdump.tar" : + (isencrypted ? "vmcore_encrypted" : "vmcore"), ext); if (symlink(corename, linkname) == -1) { syslog(LOG_WARNING, "unable to create symlink %s/%s: %m", savedir, linkname); @@ -832,12 +844,24 @@ } static void +raiselimits(void) +{ + struct rlimit rl; + + rl.rlim_cur = rl.rlim_max = RLIM_INFINITY; + (void)setrlimit(RLIMIT_AS, &rl); + (void)setrlimit(RLIMIT_CPU, &rl); + (void)setrlimit(RLIMIT_DATA, &rl); + (void)setrlimit(RLIMIT_FSIZE, &rl); +} + +static void usage(void) { xo_error("%s\n%s\n%s\n", "usage: savecore -c [-v] [device ...]", " savecore -C [-v] [device ...]", - " savecore [-fkvz] [-m maxdumps] [directory [device ...]]"); + " savecore [-Jfkvz] [-m maxdumps] [directory [device ...]]"); exit(1); } @@ -848,17 +872,19 @@ struct fstab *fsp; int i, ch, error; - checkfor = compress = clear = force = keep = verbose = 0; + checkfor = compress = clear = force = keep = verbose = xzcompress = 0; nfound = nsaved = nerr = 0; openlog("savecore", LOG_PERROR, LOG_DAEMON); signal(SIGINFO, infohandler); + raiselimits(); + argc = xo_parse_args(argc, argv); if (argc < 0) exit(1); - while ((ch = getopt(argc, argv, "Ccfkm:vz")) != -1) + while ((ch = getopt(argc, argv, "CcfJkm:vz")) != -1) switch(ch) { case 'C': checkfor = 1; @@ -869,6 +895,9 @@ case 'f': force = 1; break; + case 'J': + xzcompress = 1; + break; case 'k': keep = 1; break; @@ -891,7 +920,9 @@ } if (checkfor && (clear || force || keep)) usage(); - if (clear && (compress || keep)) + if (clear && (compress || keep || xzcompress)) + usage(); + if (compress && xzcompress) usage(); if (maxdumps > 0 && (checkfor || clear)) usage(); Index: sbin/savecore/savecore_xz.c =================================================================== --- /dev/null +++ sbin/savecore/savecore_xz.c @@ -0,0 +1,956 @@ +/*- + * Copyright (c) 2017 Netflix, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The names of the authors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define LZMA_MANUAL_HEADERS 1 +#include + +#include "savecore.h" + +/* + * Here's our general philosphy: + * + * Allow this to be an (almost) drop-in replacement for a normal file stream. + * + * At the moment, we only allow writing, although it would be "easy enough" to + * allow reading, too. + * + * We also don't support seeking (which seems like an obvious enough + * restriction). + * + * As much as possible, we will try to limit copies by doing buffer exchanges + * when called to write. (If the standard library is trying to hand our write + * function the same buffer we allocated for it, we will simply take the buffer + * and allocate a new one for it to use next time. Otherwise, we will try to + * simply consume the buffer the standard library gives us. We only copy to + * our own buffer if absolutely necessary.) + * + * We support threading. If threading for writes, we will use the specified + * number of LZMA threads. We will also try to use the aio(4) facility to + * offload writes, but will fallback to normal synchronous writes, if needed. + */ + +#define NUM_AIO_CBS 4 +#define NUM_OBUFS (NUM_AIO_CBS + 1) + +#define DEFAULT_BUFSZ (10 * 1024 * 1024) /* 10 MB */ + +/* + * Cookie that tracks our state. + * + * For ease, aiocb's always use the same index both in the pointer list and + * in the structure. + */ +struct xz_cookie { + lzma_stream xzc_strm; /* LZMA stream */ + struct aiocb xzc_iocbs[NUM_AIO_CBS]; /* aio(4) CBs */ + struct aiocb const *xzc_iocbps[NUM_AIO_CBS];/* aio(4) CB pointers */ + void *xzc_obufs[NUM_OBUFS]; /* Output buffers */ + char *xzc_fpibuf; /* Buffer set via setvbuf */ + char *xzc_lzmaibuf; /* Buffer supplied to LZMA */ + FILE *xzc_fp; /* Back-pointer to fp */ + int (*xzc_flush)(struct xz_cookie *, bool);/* Flush function */ + size_t xzc_outbufsz; /* Output buffer size */ + size_t xzc_inbufsz; /* Input buffer size */ + int xzc_fd; /* Output fd */ + int xzc_niocbs; /* Members in xzc_iocbps */ + int xzc_iocbsbits; /* Free xzc_iocbs entries */ + int xzc_obufsbits; /* Free xzc_obufs entries */ + int xzc_curobuf; /* Current obuf index */ + int xzc_flags; /* Flags */ +#define XZ_ZEROCOPY 0x1 +}; + +/* Return a reasonable errno value for the given LZMA return value. */ +static int +_xz_errno(lzma_ret lzmarv) +{ + int rv; + + switch(lzmarv) { + case LZMA_MEM_ERROR: + case LZMA_MEMLIMIT_ERROR: + rv = ENOMEM; + break; + case LZMA_PROG_ERROR: + rv = EDOOFUS; + break; + default: + rv = EINVAL; + break; + } + return (rv); +} + +/* + * Flush the output buffer to disk using normal, synchronous writes. In this + * context, block is ignored. + * Return 0 on success. On failure, return -1 and set errno. + */ +static int +_xz_sync_flush(struct xz_cookie *state, bool block __unused) +{ + uint8_t *buf; + size_t rem; + ssize_t written; + + /* Check to make sure we have something to do. */ + if (state->xzc_strm.next_out == NULL || + state->xzc_strm.avail_out == state->xzc_outbufsz) + return (0); + buf = (uint8_t *)state->xzc_obufs[state->xzc_curobuf]; + rem = state->xzc_outbufsz - state->xzc_strm.avail_out; + + while (rem) { + written = write(state->xzc_fd, buf, MIN(INT_MAX, rem)); + if (written < 0 && errno == EINTR) + written = 0; + else if (written < 0) + return (-1); + rem -= written; + buf += written; + } + + /* Reset the stream's buffer. */ + state->xzc_strm.next_out = state->xzc_obufs[state->xzc_curobuf]; + state->xzc_strm.avail_out = state->xzc_outbufsz; + return (0); +} + +/* + * Obtain a free output buffer. + * Returns a pointer to the buffer; + */ +static void * +_xz_get_obuf(struct xz_cookie *state) +{ + int i; + + i = ffs(state->xzc_obufsbits); + assert(i != 0); + i--; + state->xzc_obufsbits &= ~(1 << i); + state->xzc_curobuf = i; + return (state->xzc_obufs[i]); +} + +/* + * Mark an outbut buffer as available for use. + */ +static void +_xz_return_obuf(struct xz_cookie *state, volatile void *buf) +{ + int i; + + for (i = 0; i < NUM_OBUFS; i++) + if ((volatile void *)state->xzc_obufs[i] == buf) { + state->xzc_obufsbits |= (1 << i); + return; + } + + /* This should never happen. */ + assert(1 == 0); +} + +/* + * Flush the output buffer to disk. If block is true, don't return until + * able to flush the output buffer. + * Return 0 on success. On failure, return -1 and set errno. + */ +static int +_xz_aio_flush(struct xz_cookie *state, bool block) +{ + const struct timespec zerots = {0,0}; + const struct timespec *timeout; + struct aiocb *cb; + ssize_t status; + int i, newerrno, rv; + + newerrno = rv = 0; + + /* + * Always start by checking for AIO success. If block is set, then + * suspend indefinitely; otherwise, merely poll to see if anything + * needs to be done. If there are no outstanding I/O requests, just + * skip this check. + */ + if (!state->xzc_niocbs) + goto skip_check; +again: + if (block) + timeout = NULL; + else + timeout = &zerots; + if (aio_suspend(state->xzc_iocbps, state->xzc_niocbs, timeout)) { + if (errno != EAGAIN) + warn("%s: Error calling aio_suspend", __func__); + } else { + /* Figure out which AIO is ready. */ + for (i = state->xzc_niocbs - 1; i >= 0; i--) { + if (state->xzc_iocbps[i] == NULL) { + if (i == (state->xzc_niocbs - 1)) + state->xzc_niocbs--; + continue; + } + status = aio_error(state->xzc_iocbps[i]); + if (status == EINPROGRESS) + continue; + /* + * It either succeeded or failed. Get the final status, + * free the kernel resources, and pull it off the list. + */ + status = aio_return(&state->xzc_iocbs[i]); + state->xzc_iocbps[i] = NULL; + state->xzc_iocbsbits |= (1 << i); + if (i == (state->xzc_niocbs - 1)) + state->xzc_niocbs--; + + /* + * Check the status. If it failed, update rv and + * errno. + */ + if (status < 0 && newerrno == 0) { + newerrno = errno; + rv = (-1); + } else if (status >= 0 && newerrno == 0 && + (size_t)status != state->xzc_iocbs[i].aio_nbytes) { + /* Likely cause. */ + newerrno = ENOSPC; + rv = (-1); + } + + /* Mark the output buffer as free. */ + _xz_return_obuf(state, state->xzc_iocbs[i].aio_buf); + } + } + + /* + * If we had a failure of one I/O request, just block waiting for + * the rest to complete. It is likely future requests will fail. + * Once the rest of the requests complete, return the first error + * we saw. + */ + if (rv && state->xzc_niocbs) { + block = true; + goto again; + } else if (rv) + goto done; + +skip_check: + /* If there is no output to send at this time, we're done. */ + if (state->xzc_strm.next_out == NULL || + state->xzc_strm.avail_out == state->xzc_outbufsz) + goto done; + + /* + * If we have no AIO blocks left, set EAGAIN. Likewise, if we + * have no output buffers left, set EAGAIN. + */ + if (!state->xzc_iocbsbits || !state->xzc_obufsbits) { + assert(!block); + newerrno = EAGAIN; + rv = (-1); + goto done; + } + + /* + * Now, try to queue the AIO request. + */ + i = ffs(state->xzc_iocbsbits); + assert(i > 0); + i--; + + cb = &state->xzc_iocbs[i]; + memset(cb, 0, sizeof(struct aiocb)); + cb->aio_fildes = state->xzc_fd; + cb->aio_buf = state->xzc_obufs[state->xzc_curobuf]; + cb->aio_nbytes = state->xzc_outbufsz - state->xzc_strm.avail_out; + if (aio_write(cb)) { + warn("%s: error calling aio_write", __func__); + /* + * If this failed and there is no outstanding data, try + * to switch to synchronous writes. + */ + if (state->xzc_niocbs == 0 && !_xz_sync_flush(state, block)) { + fprintf(stderr, "%s: Switching to synchronous writes\n", + __func__); + state->xzc_flush = _xz_sync_flush; + newerrno = 0; + } else + rv = (-1); + goto done; + } + + /* Record this control block as in use. */ + state->xzc_iocbsbits &= ~(1 << i); + state->xzc_iocbps[i] = cb; + if (state->xzc_niocbs <= i) + state->xzc_niocbs = i + 1; + + /* Refill. */ + state->xzc_strm.next_out = _xz_get_obuf(state); + state->xzc_strm.avail_out = state->xzc_outbufsz; + +done: + errno = newerrno; + return (rv); +} + +/* + * Run the LZMA encoding. If block is true, don't return until the input + * has been completely consumed. Otherwise, set EAGAIN if we can't make + * progress due to a full output queue. + * Return 0 on success. On failure, return -1 and set errno. + */ +static int +_xz_run(struct xz_cookie *state, bool finished, bool block) +{ + lzma_stream *strm; + lzma_ret lzmarv; + + strm = &state->xzc_strm; + + while (finished || strm->avail_in) { + lzmarv = lzma_code(strm, finished ? LZMA_FINISH: LZMA_RUN); + switch (lzmarv) { + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_BUF_ERROR: + break; + default: + errno = _xz_errno(lzmarv); + return (-1); + } + if (lzmarv == LZMA_STREAM_END || strm->avail_out == 0) + if ((*state->xzc_flush)(state, block)) + return (-1); + if (lzmarv == LZMA_STREAM_END) { + assert(finished); + assert(strm->avail_in == 0); + return (0); + } + } + return (0); +} + +static size_t +_xz_ibuf_avail(const struct xz_cookie *state, uint8_t **curp) +{ + union { + const uint8_t *c; + uint8_t *v; + } cur, end; + + /* First byte after the data already in the buffer. */ + cur.c = state->xzc_strm.next_in; + cur.c += state->xzc_strm.avail_in; + + /* First byte after the buffer. */ + end.c = (uint8_t *)state->xzc_lzmaibuf; + end.c += state->xzc_inbufsz; + + /* cur should be within the buffer. */ + assert(cur.v > (uint8_t *)state->xzc_lzmaibuf && cur.v <= end.v); + + /* + * Use the union game to make the compiler understand that it is safe + * to assign the 'const' value to a non-const pointer. + */ + if (curp != NULL) + *curp = cur.v; + + return (end.c - cur.c); +} + +static int +_xz_write(void *cookie, const char *buf, int len) +{ + struct xz_cookie *state; + int copylen, rv; + bool block, needcopy, zerocopy; + + state = (struct xz_cookie *)cookie; + + errno = 0; + + /* Basic argument error-checking. */ + if (len == 0) + return (0); + if (len < 0) { + errno = EINVAL; + return (-1); + } + if (buf == NULL) { + errno = EFAULT; + return (-1); + } + + zerocopy = ((state->xzc_flags & XZ_ZEROCOPY) == XZ_ZEROCOPY); + /* + * Try to consume the input stream. In theory, there should + * usually be nothing in the input stream, since we flush it each time. + * However, we could end up in this situation if we have reached our + * limit of outstanding aio(4) requests. If needed, we will block here + * waiting for the input queue to be empty. + */ + block = false; + while (state->xzc_strm.avail_in) { + rv = _xz_run(state, false, block); + if (rv && errno != EAGAIN) + return (-1); + + /* + * If zero copy is set, we prioritize not copying, so we need + * to drain the remaining input. Otherwise, we can see if we + * can copy to the end of the current buffer. + */ + if (!zerocopy && _xz_ibuf_avail(state, NULL)) + break; + + /* If we loop, we want to block. */ + block = true; + } + + /* + * If we still have bytes in the input queue, see if we can copy + * to the end of the buffer. Then, we'll just return. Why spin our + * wheels more here? + */ + if (state->xzc_strm.avail_in) { + uint8_t *curp; + + assert(!zerocopy); + if ((size_t)len > _xz_ibuf_avail(state, &curp)) + len = (int)_xz_ibuf_avail(state, NULL); + assert(len > 0); + memcpy(curp, buf, len); + return (len); + } + + /* + * See if we can do a buffer exchange. If so, we shouldn't need to + * have zero copy semantics. + */ + if (buf == (const char *)state->xzc_fpibuf) + needcopy = false; + else + needcopy = true; + + /* + * See if we can run through the buffer. If so, nothing needs to be + * done. (In the case of zero copy, we *must* run through the buffer + * unless it was ours.) + */ + state->xzc_strm.avail_in = len; + state->xzc_strm.next_in = (const void *)buf; + block = false; + while (state->xzc_strm.avail_in) { + rv = _xz_run(state, false, block); + if (rv && errno != EAGAIN) + return (-1); + if (!(zerocopy && needcopy)) + break; + block = true; + } + + /* Entire buffer consumed. Return success. */ + if (state->xzc_strm.avail_in == 0) + return (len); + + /* If we were using our buffer, try a buffer exchange. */ + if (!needcopy) { + /* + * Try a buffer exchange. If this fails, we will fall back to + * doing a copy. + */ + if (!setvbuf(state->xzc_fp, state->xzc_lzmaibuf, _IOFBF, + state->xzc_inbufsz)) { + void *tmp; + + assert((size_t)len <= state->xzc_inbufsz); + needcopy = false; + tmp = state->xzc_lzmaibuf; + state->xzc_lzmaibuf = state->xzc_fpibuf; + state->xzc_fpibuf = tmp; + + /* + * We're actually done at this point. Because buf was + * state->xzc_fpibuf, the buffer swap means that + * state->xzc_strm.next_in points into the + * state->xzc_lzmaibuf. + * + * So, we can merely return at this point and claim + * we consumed all of the bytes. + */ + return (len); + } + } + + /* + * At this point, figure out what we want to do. The "obvious" thing + * is to merely return the number of bytes consumed. So, let's try + * that (if, of course, any bytes actually were consumed). + */ + if ((size_t)len > state->xzc_strm.avail_in) { + /* + * Reset avail_in and next_in, since we're giving back the rest + * of the buffer for now. + */ + state->xzc_strm.next_in = NULL; + state->xzc_strm.avail_in = 0; + return ((size_t)len - state->xzc_strm.avail_in); + } + + /* + * Finally, since all else has failed, try to copy the bytes to the + * state->xzc_lzmaibuf buffer, which should now be unused. + */ + copylen = MIN(state->xzc_strm.avail_in, state->xzc_inbufsz); + memcpy(state->xzc_lzmaibuf, state->xzc_strm.next_in, copylen); + + /* + * The total amount we consumed is the amount consumed by _xz_run() + * and the amount we copied to a buffer. + */ + len -= state->xzc_strm.avail_in; + len += copylen; + + /* Reset the input stream. */ + state->xzc_strm.avail_in = copylen; + state->xzc_strm.next_in = (void *)state->xzc_lzmaibuf; + + return (len); +} + +static void +_xz_cookie_cleanup(struct xz_cookie *state) +{ + int i; + + if (state->xzc_fd >= 0) + close(state->xzc_fd); + lzma_end(&state->xzc_strm); + for (i = 0; i < NUM_OBUFS; i++) + free(state->xzc_obufs[i]); + free(state->xzc_fpibuf); + free(state->xzc_lzmaibuf); + free(state); +} + +static int +_xz_close(void *cookie) +{ + struct xz_cookie *state; + int newerrno, tmp, rv; + + state = (struct xz_cookie *)cookie; + newerrno = 0; + rv = 0; + + /* Flush the buffers. */ + do { + tmp = _xz_run(state, true, true); + if (tmp && !rv) { + rv = tmp; + newerrno = errno; + } + } while (state->xzc_strm.avail_in || state->xzc_niocbs); + + _xz_cookie_cleanup(state); + errno = newerrno; + return (rv); +} + +/* + * Setup an unthreaded compression context. + * Returns 0 on success. Returns -1 and sets errno on error. + */ +static int +_xz_init_single_encoder(struct xz_cookie *state, uint32_t preset, + lzma_check check) +{ + const lzma_stream initstrm = LZMA_STREAM_INIT; + lzma_stream *strm; + lzma_ret lzmarv; + + strm = &state->xzc_strm; + *strm = initstrm; + + if ((lzmarv = lzma_easy_encoder(strm, preset, check)) != LZMA_OK) { + errno = _xz_errno(lzmarv); + return (-1); + } + + return (0); +} + +/* Determine the amount of free memory. */ +static uint64_t +_xz_get_freemem(void) +{ + uint64_t rv; + unsigned int freepages, inactive; + size_t len; + + len = sizeof(freepages); + if (sysctlbyname("vm.stats.vm.v_free_count", &freepages, &len, NULL, 0)) + return (UINT64_MAX); + if (len != sizeof(freepages)) + return (UINT64_MAX); + if (sysctlbyname("vm.stats.vm.v_inactive_count", &inactive, &len, NULL, + 0)) + return (UINT64_MAX); + if (len != sizeof(inactive)) + return (UINT64_MAX); + rv = (uint64_t)freepages * (uint64_t)getpagesize(); + rv += (uint64_t)inactive * (uint64_t)getpagesize(); + return (rv); +} + +/* + * Setup a multi-threaded compression context. + * Returns 0 on success. Returns -1 and sets errno on error. + */ +static int +_xz_init_mt_encoder(struct xz_cookie *state, uint32_t threads, uint32_t preset, + lzma_check check) +{ + lzma_mt mt_options; + const lzma_stream initstrm = LZMA_STREAM_INIT; + lzma_stream *strm; + uint64_t freemem, overhead; + lzma_ret lzmarv; + + strm = &state->xzc_strm; + *strm = initstrm; + + mt_options.flags = 0; + mt_options.threads = threads; + mt_options.block_size = 0; + mt_options.timeout = 0; + mt_options.preset = preset; + mt_options.filters = NULL; + mt_options.check = check; + + /* + * Reduce the number of threads until we are under the maximum memory + * size. If we can't find such a combination, fall back to using the + * unthreaded encoder. + */ + overhead = (state->xzc_inbufsz * 2) + (state->xzc_outbufsz * NUM_OBUFS); + freemem = _xz_get_freemem(); + if (freemem > overhead) + freemem -= overhead; + else + freemem = 0; + while (lzma_stream_encoder_mt_memusage(&mt_options) > freemem) + if (--mt_options.threads == 0) + return (_xz_init_single_encoder(state, preset, check)); + + if ((lzmarv = lzma_stream_encoder_mt(strm, &mt_options)) != LZMA_OK) { + errno = _xz_errno(lzmarv); + return (-1); + } + + return (0); +} + +/* + * Determine open flags from the mode. + * (This somewhat follows the logic from __sflags() from stdio.) + * Note that we always set O_APPEND for files with write permissions. + * This simplifies the logic in the aio writes. + * + * Return 0 if everything is OK. Otherwise, return -1. + */ +static int +_xzopen_flags(const char *mode, int *openflags, int *ourflags) +{ + bool done; + + switch (*mode++) { + case 'r': + *openflags = O_RDONLY; + break; + case 'w': + *openflags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND; + break; + case 'a': + *openflags = O_WRONLY | O_CREAT | O_APPEND; + break; + default: + errno = EINVAL; + return (-1); + } + done = false; + *ourflags = 0; + while (!done) + switch (*mode++) { + case 'b': + /* Ignore binary mode */ + break; + case '+': + /* We don't allow read and write. */ + errno = EINVAL; + return (-1); + case 'x': + /* This check makes sense since we don't allow R/W. */ + if (!(*openflags & O_WRONLY)) { + errno = EINVAL; + return (-1); + } + *openflags |= O_EXCL; + break; + case 'e': + *openflags |= O_CLOEXEC; + break; + case 'v': + *openflags |= O_VERIFY; + break; + case 'z': + *ourflags |= XZ_ZEROCOPY; + break; + default: + done = true; + break; + } + return (0); +} + +/* + * Setup the XZ cookie for writing. + * + * path - The path to open. + * mode - See fopen(3). This must include write privileges and cannot include + * read privileges. + * threads - Number of threads to use. 1 enables aio(4), while 0 also disables + * aio. + * preset - Compression preset level. + * check - See lzma/check.h for available checks. + * ibufsz - Size of input buffers. There are up to two of these. Choosing 0 + * allows auto-selection. + * obufsz - Size of output buffers. There are up to NUM_OBUFS of these. + * Choosing 0 allows auto-selection. + * + * Returns a pointer to the cookie on success. On failure, returns NULL and + * sets errno. + */ +static struct xz_cookie * +_xz_wcookie(const char * restrict path, const char * restrict mode, + uint32_t threads, uint32_t preset, lzma_check check, size_t ibufsz, + size_t obufsz) +{ + struct xz_cookie *state; + int flags, i, newerrno, openflags; + bool lzmainit, success; + + lzmainit = success = false; + + /* Check the flags. We require write-only privileges. */ + if (_xzopen_flags(mode, &openflags, &flags)) + return (NULL); + if ((openflags & O_ACCMODE) != O_WRONLY) { + errno = EINVAL; + return (NULL); + } + + state = calloc(sizeof(struct xz_cookie), 1); + if (state == NULL) { + errno = ENOMEM; + return (NULL); + } + + if ((state->xzc_fd = open(path, openflags, DEFFILEMODE)) < 0) { + newerrno = errno; + goto done; + } + + if (!ibufsz) + ibufsz = DEFAULT_BUFSZ; + if (!obufsz) + obufsz = DEFAULT_BUFSZ; + state->xzc_outbufsz = obufsz; + state->xzc_inbufsz = ibufsz; + + state->xzc_flags = flags; + + /* Setup the LZMA context. */ + if (threads > 1) { + if (_xz_init_mt_encoder(state, threads, preset, check)) { + newerrno = errno; + goto done; + } + } else if (_xz_init_single_encoder(state, preset, check)) { + newerrno = errno; + goto done; + } + lzmainit = true; + + /* Setup input buffers.*/ + if ((state->xzc_lzmaibuf = malloc(ibufsz)) == NULL || + (state->xzc_fpibuf = malloc(ibufsz)) == NULL) { + newerrno = errno; + goto done; + } + + /* Setup output buffers. */ + if (threads > 0) { + /* + * Allocate NUM_OBUFS output buffers. As long as we get one + * buffer, we won't declare an error. (After all, it only takes + * one.) However, let's be honest that we'll probably have + * bigger problems down the road if we aren't able to allocate + * even a few buffers here. + */ + for (i = 0; i < NUM_OBUFS; i++) + if ((state->xzc_obufs[i] = malloc(obufsz)) != NULL) + state->xzc_obufsbits |= (1 << i); + if (!state->xzc_obufsbits) { + newerrno = errno; + goto done; + } + + /* Declare all AIO CBs available. */ + state->xzc_iocbsbits = (1 << NUM_AIO_CBS) - 1; + + /* Try async I/O. */ + state->xzc_flush = _xz_aio_flush; + } else { + /* Allocate a single output buffer. */ + if ((state->xzc_obufs[0] = malloc(obufsz)) == NULL) { + newerrno = errno; + goto done; + } + state->xzc_obufsbits = 1; + + /* Do synchronous I/O. */ + state->xzc_flush = _xz_sync_flush; + } + + success = true; + +done: + if (!success) { + if (state->xzc_fd >= 0) + close(state->xzc_fd); + if (lzmainit) + lzma_end(&state->xzc_strm); + for (i = 0; i < NUM_OBUFS; i++) + free(state->xzc_obufs[i]); + free(state->xzc_fpibuf); + free(state->xzc_lzmaibuf); + free(state); + state = NULL; + errno = newerrno; + } else + errno = 0; + return (state); +} + +/* + * Open a stream for writing. + * + * path - The path to open. + * mode - See fopen(3). This must include write privileges and cannot include + * read privileges. + * threads - Number of threads to use. 1 enables aio(4), while 0 also disables + * aio. + * preset - Compression preset level. + * check - See lzma/check.h for available checks. + * ibufsz - Size of input buffers. There are up to two of these. Choosing 0 + * allows auto-selection. + * obufsz - Size of output buffers. There are up to NUM_OBUFS of these. + * Choosing 0 allows auto-selection. + * + * Returns a pointer to the stream on success. On failure, returns NULL and + * sets errno. + */ +FILE * +xzopen(const char * restrict path, const char * restrict mode, + uint32_t threads, uint32_t preset, lzma_check check, size_t ibufsz, + size_t obufsz) +{ + FILE *rv; + struct xz_cookie *state; + int newerrno; + + state = _xz_wcookie(path, mode, threads, preset, check, ibufsz, obufsz); + if (state == NULL) + return (NULL); + + /* Allocate the stream. */ + if ((rv = funopen(state, NULL, _xz_write, NULL, _xz_close)) == NULL) { + newerrno = errno; + goto done; + } + /* Record the back-reference to the stream. */ + state->xzc_fp = rv; + + /* Setup the first output buffer. */ + state->xzc_strm.next_out = _xz_get_obuf(state); + state->xzc_strm.avail_out = state->xzc_outbufsz; + + /* + * Tell the stream to use our input buffer. We don't check the return + * value since it isn't strictly necessary this succeed. + * + * We don't do this if XZ_ZEROCOPY is set, since we generally want the + * user buffer to be passed through without a copy. + */ + if ((state->xzc_flags & XZ_ZEROCOPY) == 0) + (void)setvbuf(rv, state->xzc_fpibuf, _IOFBF, + state->xzc_inbufsz); + +done: + if (rv == NULL) { + _xz_cookie_cleanup(state); + errno = newerrno; + } else + errno = 0; + return (rv); +}