Index: sbin/dumpon/dumpon.8 =================================================================== --- sbin/dumpon/dumpon.8 +++ sbin/dumpon/dumpon.8 @@ -28,7 +28,7 @@ .\" From: @(#)swapon.8 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd October 24, 2017 +.Dd November 14, 2017 .Dt DUMPON 8 .Os .Sh NAME @@ -39,6 +39,7 @@ .Op Fl v .Op Fl k Ar public_key_file .Op Fl z +.Op Fl Z .Ar special_file .Nm .Op Fl v @@ -131,6 +132,22 @@ kernel option. .Pp The +.Fl Z +option configures the kernel to compress the dump in zstd format before writing +it to the dump device. +This reduces the amount of space required for the dump and accelerates +recovery with +.Xr savecore 8 +since less data needs to be copied from the dump device. +When compression is enabled, the +.Nm +utility will not verify that the dump device is sufficiently large for a full +dump. +This flag requires a kernel compiled with the +.Dv ZSTDIO +kernel option. +.Pp +The .Fl l flag causes .Nm Index: sbin/dumpon/dumpon.c =================================================================== --- sbin/dumpon/dumpon.c +++ sbin/dumpon/dumpon.c @@ -71,7 +71,7 @@ usage(void) { fprintf(stderr, "%s\n%s\n%s\n", - "usage: dumpon [-v] [-k public_key_file] [-z] special_file", + "usage: dumpon [-v] [-k public_key_file] [-zZ] special_file", " dumpon [-v] off", " dumpon [-v] -l"); exit(EX_USAGE); @@ -190,12 +190,12 @@ int ch; int i, fd; int do_listdumpdev = 0; - bool enable, gzip; + bool enable, gzip, zstd; - gzip = false; + gzip = zstd = false; pubkeyfile = NULL; - while ((ch = getopt(argc, argv, "k:lvz")) != -1) + while ((ch = getopt(argc, argv, "k:lvzZ")) != -1) switch((char)ch) { case 'k': pubkeyfile = optarg; @@ -209,6 +209,9 @@ case 'z': gzip = true; break; + case 'Z': + zstd = true; + break; default: usage(); } @@ -252,7 +255,7 @@ if (fd < 0) err(EX_OSFILE, "%s", dumpdev); - if (!gzip) + if (!gzip && !zstd) check_size(fd, dumpdev); bzero(&kda, sizeof(kda)); @@ -266,8 +269,11 @@ #endif kda.kda_enable = 1; - kda.kda_compression = gzip ? KERNELDUMP_COMP_GZIP : - KERNELDUMP_COMP_NONE; + kda.kda_compression = KERNELDUMP_COMP_NONE; + if (zstd) + kda.kda_compression = KERNELDUMP_COMP_ZSTD; + else if (gzip) + kda.kda_compression = KERNELDUMP_COMP_GZIP; i = ioctl(fd, DIOCSKERNELDUMP, &kda); explicit_bzero(kda.kda_encryptedkey, kda.kda_encryptedkeysize); free(kda.kda_encryptedkey); Index: sbin/savecore/savecore.c =================================================================== --- sbin/savecore/savecore.c +++ sbin/savecore/savecore.c @@ -107,6 +107,7 @@ uint64_t dumplen; time_t t; const char *stat_str; + const char *comp_str; xo_flush_h(xo); xo_emit_h(xo, "{Lwc:Dump header from device}{:dump_device/%s}\n", @@ -121,9 +122,12 @@ (long long)dumplen); xo_emit_h(xo, "{P: }{Lwc:Blocksize}{:blocksize/%d}\n", dtoh32(h->blocksize)); - xo_emit_h(xo, "{P: }{Lwc:Compression}{:compression/%s}\n", - h->compression == KERNELDUMP_COMP_GZIP ? - "gzip" : "none"); + comp_str = "none"; + if (h->compression == KERNELDUMP_COMP_GZIP) + comp_str = "gzip"; + else if (h->compression == KERNELDUMP_COMP_ZSTD) + comp_str = "zstd"; + xo_emit_h(xo, "{P: }{Lwc:Compression}{:compression/%s}\n", comp_str); t = dtoh64(h->dumptime); xo_emit_h(xo, "{P: }{Lwc:Dumptime}{:dumptime/%s}", ctime(&t)); @@ -613,6 +617,7 @@ case KERNELDUMP_COMP_NONE: break; case KERNELDUMP_COMP_GZIP: + case KERNELDUMP_COMP_ZSTD: if (compress && verbose) printf("dump is already compressed\n"); compress = false; @@ -741,7 +746,8 @@ (isencrypted ? "vmcore_encrypted" : "vmcore"), bounds); fp = zopen(corename, "w"); } else if (iscompressed && !isencrypted) { - snprintf(corename, sizeof(corename), "vmcore.%d.gz", bounds); + snprintf(corename, sizeof(corename), "vmcore.%d.%s", bounds, + (kdhl.compression == KERNELDUMP_COMP_GZIP) ? "gz" : "zst"); fp = fopen(corename, "w"); } else { snprintf(corename, sizeof(corename), "%s.%d", @@ -843,9 +849,10 @@ } } if (compress || iscompressed) { - snprintf(linkname, sizeof(linkname), "%s.last.gz", + snprintf(linkname, sizeof(linkname), "%s.last.%s", istextdump ? "textdump.tar" : - (isencrypted ? "vmcore_encrypted" : "vmcore")); + (isencrypted ? "vmcore_encrypted" : "vmcore"), + (kdhl.compression == KERNELDUMP_COMP_ZSTD) ? "zstd" : "gz"); } else { snprintf(linkname, sizeof(linkname), "%s.last", istextdump ? "textdump.tar" : Index: share/man/man5/core.5 =================================================================== --- share/man/man5/core.5 +++ share/man/man5/core.5 @@ -28,7 +28,7 @@ .\" @(#)core.5 8.3 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd October 5, 2015 +.Dd November 14, 2017 .Dt CORE 5 .Os .Sh NAME @@ -101,23 +101,30 @@ .Va kern.sugid_coredump to 1. .Pp -Corefiles can be compressed by the kernel if the following item -is included in the kernel configuration file: +Corefiles can be compressed by the kernel if at least one of the following +items is included in the kernel configuration file: .Bl -tag -width "1234567890" -compact -offset "12345" .It options GZIO +.It options +ZSTDIO .El .Pp -When the GZIO option is included, the following sysctls control whether core -files will be compressed: -.Bl -tag -width "kern.compress_user_cores_gzlevel" -compact -offset "12345" +When the GZIO or ZSTDIO option is included, the following sysctls control +whether core files will be compressed: +.Bl -tag -width "kern.compress_user_cores_zstdlevel" -compact -offset "12345" .It Em kern.compress_user_cores_gzlevel Gzip compression level. Defaults to 6. +.It Em kern.compress_user_cores_zstdlevel +Zstd compression level. +Defaults to 6. .It Em kern.compress_user_cores Actually compress user cores. Compressed core files will have a suffix of .Ql .gz +or +.Ql .zst appended to them. .El .Sh NOTES Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -81,6 +81,7 @@ options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits +options ZSTDIO # Enable Zstd-compressed core dumps # Debugging support. Always need this: options KDB # Enable kernel debugger support. Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES +++ sys/conf/NOTES @@ -3018,6 +3018,10 @@ # This enables support for compressed core dumps. options GZIO +# zstd I/O stream support +# This enables support for Zstd compressed core dumps. +options ZSTDIO + # BHND(4) drivers options BHND_LOGLEVEL # Logging threshold level Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3816,6 +3816,7 @@ kern/kern_umtx.c standard kern/kern_uuid.c standard kern/kern_xxx.c standard +kern/kern_zstdio.c optional zstdio kern/link_elf.c standard kern/linker_if.m standard kern/md4c.c optional netsmb Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -220,6 +220,7 @@ UMTX_PROFILING UMTX_CHAINS opt_global.h VERBOSE_SYSINIT +ZSTDIO opt_zstdio.h # POSIX kernel options P1003_1B_MQUEUE opt_posix.h Index: sys/kern/imgact_elf.c =================================================================== --- sys/kern/imgact_elf.c +++ sys/kern/imgact_elf.c @@ -35,6 +35,7 @@ #include "opt_capsicum.h" #include "opt_compat.h" #include "opt_gzio.h" +#include "opt_zstdio.h" #include #include @@ -71,6 +72,7 @@ #include #include #include +#include #include #include @@ -1184,6 +1186,7 @@ struct thread *td; struct vnode *vp; struct gzio_stream *gzs; + struct zstdio_stream *gzsts; }; static void cb_put_phdr(vm_map_entry_t, void *); @@ -1217,6 +1220,9 @@ static void note_procstat_umask(void *, struct sbuf *, size_t *); static void note_procstat_vmmap(void *, struct sbuf *, size_t *); +#if defined(GZIO) || defined(ZSTDIO) +extern bool coredump_zstd; +#endif #ifdef GZIO extern int compress_user_cores_gzlevel; @@ -1257,6 +1263,46 @@ } #endif /* GZIO */ +#ifdef ZSTDIO +extern int compress_user_cores_zstdlevel; + +/* + * Write out a core segment to the compression stream. + */ +static int +compress_chunk_zstd(struct coredump_params *p, char *base, char *buf, u_int len) +{ + u_int chunk_len; + int error; + + while (len > 0) { + chunk_len = MIN(len, CORE_BUF_SIZE); + + /* + * We can get EFAULT error here. + * In that case zero out the current chunk of the segment. + */ + error = copyin(base, buf, chunk_len); + if (error != 0) + bzero(buf, chunk_len); + error = zstdio_write(p->gzsts, buf, chunk_len); + if (error != 0) + break; + base += chunk_len; + len -= chunk_len; + } + return (error); +} + +static int +core_zstd_write(void *base, size_t len, off_t offset, void *arg) +{ + + return (core_write((struct coredump_params *)arg, base, len, offset, + UIO_SYSSPACE)); +} +#endif /* ZSTDIO */ + static int core_write(struct coredump_params *p, const void *base, size_t len, off_t offset, enum uio_seg seg) @@ -1276,6 +1322,10 @@ #ifdef GZIO if (p->gzs != NULL) return (compress_chunk(p, base, tmpbuf, len)); +#endif +#ifdef ZSTDIO + if (p->gzsts != NULL) + return (compress_chunk_zstd(p, base, tmpbuf, len)); #endif /* * EFAULT is a non-fatal error that we can get, for example, @@ -1325,6 +1375,11 @@ if (p->gzs != NULL) error = gzio_write(p->gzs, __DECONST(char *, data), len); else +#endif +#ifdef ZSTDIO + if (p->gzsts != NULL) + error = zstdio_write(p->gzsts, __DECONST(char *, data), len); + else #endif error = core_write(p, __DECONST(void *, data), len, p->offset, UIO_SYSSPACE); @@ -1360,8 +1415,8 @@ struct note_info *ninfo; void *hdr, *tmpbuf; size_t hdrsize, notesz, coresize; -#ifdef GZIO - boolean_t compress; +#if defined(GZIO) || defined(ZSTDIO) + bool compress, use_zstd = coredump_zstd; compress = (flags & IMGACT_CORE_COMPRESS) != 0; #endif @@ -1407,15 +1462,31 @@ goto done; } -#ifdef GZIO +#if defined(GZIO) || defined(ZSTDIO) /* Create a compression stream if necessary. */ if (compress) { - params.gzs = gzio_init(core_gz_write, GZIO_DEFLATE, - CORE_BUF_SIZE, compress_user_cores_gzlevel, ¶ms); - if (params.gzs == NULL) { - error = EFAULT; - goto done; +#ifdef ZSTDIO + if (use_zstd) { + params.gzsts = zstdio_init(core_zstd_write, + CORE_BUF_SIZE, compress_user_cores_zstdlevel, + false, ¶ms); + if (params.gzsts == NULL) { + error = EFAULT; + goto done; + } } +#endif +#ifdef GZIO + if (!use_zstd) { + params.gzs = gzio_init(core_gz_write, GZIO_DEFLATE, + CORE_BUF_SIZE, compress_user_cores_gzlevel, + ¶ms); + if (params.gzs == NULL) { + error = EFAULT; + goto done; + } + } +#endif tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); } #endif @@ -1444,9 +1515,17 @@ offset += php->p_filesz; php++; } +#if defined(GZIO) || defined(ZSTDIO) + if (error == 0 && compress) { #ifdef GZIO - if (error == 0 && compress) - error = gzio_flush(params.gzs); + if (!use_zstd) + error = gzio_flush(params.gzs); +#endif +#ifdef ZSTDIO + if (use_zstd) + error = zstdio_flush(params.gzsts); +#endif + } #endif } if (error) { @@ -1456,11 +1535,17 @@ } done: -#ifdef GZIO +#if defined(GZIO) || defined(ZSTDIO) if (compress) { free(tmpbuf, M_TEMP); +#ifdef GZIO if (params.gzs != NULL) gzio_fini(params.gzs); +#endif +#ifdef ZSTDIO + if (params.gzsts != NULL) + zstdio_fini(params.gzsts); +#endif } #endif while ((ninfo = TAILQ_FIRST(¬elst)) != NULL) { Index: sys/kern/kern_shutdown.c =================================================================== --- sys/kern/kern_shutdown.c +++ sys/kern/kern_shutdown.c @@ -44,6 +44,7 @@ #include "opt_panic.h" #include "opt_sched.h" #include "opt_watchdog.h" +#include "opt_zstdio.h" #include #include @@ -73,6 +74,7 @@ #include #include #include +#include #include #include @@ -190,6 +192,24 @@ "Kernel crash dump gzip compression level"); #endif /* GZIO */ +#ifdef ZSTDIO +struct kerneldumpzstd { + struct zstdio_stream *kdzst_stream; + uint8_t *kdzst_buf; + size_t kdzst_resid; +}; + +static struct kerneldumpzstd *kerneldumpzstd_create(struct dumperinfo *di, + uint8_t compression); +static void kerneldumpzstd_destroy(struct dumperinfo *di); +static int kerneldumpzstd_write_cb(void *cb, size_t len, off_t off, void *arg); + +static int kerneldump_zstdlevel = 6; +SYSCTL_INT(_kern, OID_AUTO, kerneldump_zstdlevel, CTLFLAG_RWTUN, + &kerneldump_zstdlevel, 0, + "Kernel crash dump zstd compression level"); +#endif /* ZSTDIO */ + /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. @@ -1018,6 +1038,40 @@ } #endif /* GZIO */ +#ifdef ZSTDIO +static struct kerneldumpzstd * +kerneldumpzstd_create(struct dumperinfo *di, uint8_t compression) +{ + struct kerneldumpzstd *kdzstd; + + if (compression != KERNELDUMP_COMP_ZSTD) + return (NULL); + kdzstd = malloc(sizeof(*kdzstd), M_DUMPER, M_WAITOK | M_ZERO); + kdzstd->kdzst_stream = zstdio_init(kerneldumpzstd_write_cb, + di->maxiosize, kerneldump_zstdlevel, true, di); + if (kdzstd->kdzst_stream == NULL) { + free(kdzstd, M_DUMPER); + return (NULL); + } + kdzstd->kdzst_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); + return (kdzstd); +} + +static void +kerneldumpzstd_destroy(struct dumperinfo *di) +{ + struct kerneldumpzstd *kdzst; + + kdzst = di->kdzst; + if (kdzst == NULL) + return; + zstdio_fini(kdzst->kdzst_stream); + explicit_bzero(kdzst->kdzst_buf, di->maxiosize); + free(kdzst->kdzst_buf, M_DUMPER); + free(kdzst, M_DUMPER); +} +#endif /* ZSTDIO */ + /* Registration of dumpers */ int set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, @@ -1041,6 +1095,7 @@ dumper.blockbuf = NULL; dumper.kdc = NULL; dumper.kdgz = NULL; + dumper.kdzst = NULL; if (encryption != KERNELDUMP_ENC_NONE) { #ifdef EKCD @@ -1062,7 +1117,7 @@ devname, dumpdevname); } - if (compression != KERNELDUMP_COMP_NONE) { + if (compression == KERNELDUMP_COMP_GZIP) { #ifdef GZIO /* * We currently can't support simultaneous encryption and @@ -1080,6 +1135,25 @@ #else error = EOPNOTSUPP; goto cleanup; +#endif + } else if (compression == KERNELDUMP_COMP_ZSTD) { +#ifdef ZSTDIO + /* + * We currently can't support simultaneous encryption and + * compression. + */ + if (encryption != KERNELDUMP_ENC_NONE) { + error = EOPNOTSUPP; + goto cleanup; + } + dumper.kdzst = kerneldumpzstd_create(&dumper, compression); + if (dumper.kdzst == NULL) { + error = EINVAL; + goto cleanup; + } +#else + error = EOPNOTSUPP; + goto cleanup; #endif } @@ -1097,6 +1171,9 @@ #ifdef GZIO kerneldumpgz_destroy(&dumper); #endif +#ifdef ZSTDIO + kerneldumpzstd_destroy(&dumper); +#endif if (dumper.blockbuf != NULL) { explicit_bzero(dumper.blockbuf, dumper.blocksize); @@ -1232,6 +1309,38 @@ } #endif /* GZIO */ +#ifdef ZSTDIO +static int +kerneldumpzstd_write_cb(void *base, size_t length, off_t offset, void *arg) +{ + struct dumperinfo *di; + size_t resid, rlength; + int error; + + di = arg; + + if (length % di->blocksize != 0) { + /* + * This must be the final write after flushing the compression + * stream. Write as many full blocks as possible and stash the + * residual data in the dumper's block buffer. It will be + * padded and written in dump_finish(). + */ + rlength = rounddown(length, di->blocksize); + if (rlength != 0) { + error = _dump_append(di, base, 0, rlength); + if (error != 0) + return (error); + } + resid = length - rlength; + memmove(di->blockbuf, (uint8_t *)base + rlength, resid); + di->kdzst->kdzst_resid = resid; + return (EAGAIN); + } + return (_dump_append(di, base, 0, length)); +} +#endif /* ZSTDIO */ + /* * Write a kerneldumpheader at the specified offset. The header structure is 512 * bytes in size, but we must pad to the device sector size. @@ -1313,6 +1422,13 @@ 2 * di->blocksize - keysize; kdh->dumpextent = htod64(dumpextent); } else +#endif +#ifdef ZSTDIO + if (di->kdzst != NULL) { + dumpextent = di->mediasize - SIZEOF_METADATA - + 2 * di->blocksize - keysize; + kdh->dumpextent = htod64(dumpextent); + } else #endif return (E2BIG); } @@ -1362,6 +1478,18 @@ memmove(buf, virtual, length); return (gzio_write(di->kdgz->kdgz_stream, buf, length)); } +#endif +#ifdef ZSTDIO + void *buf; + + if (di->kdzst != NULL) { + /* Bounce through a buffer to avoid CRC errors. */ + if (length > di->maxiosize) + return (EINVAL); + buf = di->kdzst->kdzst_buf; + memmove(buf, virtual, length); + return (zstdio_write(di->kdzst->kdzst_stream, buf, length)); + } #endif return (_dump_append(di, virtual, physical, length)); } @@ -1427,6 +1555,31 @@ gzio_reset(di->kdgz->kdgz_stream); } #endif +#ifdef ZSTDIO + if (di->kdzst != NULL) { + error = zstdio_flush(di->kdzst->kdzst_stream); + if (error == EAGAIN) { + /* We have residual data in di->blockbuf. */ + error = dump_write(di, di->blockbuf, 0, di->dumpoff, + di->blocksize); + di->dumpoff += di->kdzst->kdzst_resid; + di->kdzst->kdzst_resid = 0; + } + if (error != 0) + return (error); + + /* + * We now know the size of the compressed dump, so update the + * header accordingly and recompute parity. + */ + kdh->dumplength = htod64(di->dumpoff - + (di->mediaoffset + di->mediasize - di->blocksize - extent)); + kdh->parity = 0; + kdh->parity = kerneldump_parity(kdh); + + zstdio_reset(di->kdzst->kdzst_stream); + } +#endif /* * Write kerneldump headers at the beginning and end of the dump extent. @@ -1483,6 +1636,10 @@ #ifdef GZIO if (di->kdgz != NULL) kdh->compression = KERNELDUMP_COMP_GZIP; +#endif +#ifdef ZSTDIO + if (di->kdzst != NULL) + kdh->compression = KERNELDUMP_COMP_ZSTD; #endif kdh->parity = kerneldump_parity(kdh); } Index: sys/kern/kern_sig.c =================================================================== --- sys/kern/kern_sig.c +++ sys/kern/kern_sig.c @@ -40,6 +40,7 @@ #include "opt_compat.h" #include "opt_gzio.h" #include "opt_ktrace.h" +#include "opt_zstdio.h" #include #include @@ -3252,17 +3253,36 @@ 0, sizeof(int), sysctl_debug_num_cores_check, "I", ""); #define GZ_SUFFIX ".gz" +#define ZST_SUFFIX ".zst" -#ifdef GZIO +#if defined(GZIO) || defined(ZSTDIO) static int compress_user_cores = 1; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores, CTLFLAG_RWTUN, &compress_user_cores, 0, "Compression of user corefiles"); +#else +static int compress_user_cores = 0; +#endif +#ifdef GZIO int compress_user_cores_gzlevel = 6; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_gzlevel, CTLFLAG_RWTUN, &compress_user_cores_gzlevel, 0, "Corefile gzip compression level"); -#else -static int compress_user_cores = 0; +#endif +#ifdef ZSTDIO +int compress_user_cores_zstdlevel = 6; +SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_zstdlevel, CTLFLAG_RWTUN, + &compress_user_cores_zstdlevel, 0, "Corefile Zstd compression level"); +#endif + +#if defined(ZSTDIO) +bool coredump_zstd = 1; +#elif defined(GZIO) +bool coredump_zstd = 0; +#endif +#if defined(GZIO) && defined(ZSTDIO) +SYSCTL_BOOL(_kern, OID_AUTO, compress_user_cores_zstd, CTLFLAG_RWTUN, + &coredump_zstd, 0, + "Enable to use Zstd instead of gzip for userspace core dumps"); #endif /* @@ -3361,8 +3381,14 @@ } sx_sunlock(&corefilename_lock); free(hostname, M_TEMP); - if (compress) - sbuf_printf(&sb, GZ_SUFFIX); + if (compress) { +#if defined(GZIO) || defined(ZSTDIO) + if (coredump_zstd) + sbuf_printf(&sb, ZST_SUFFIX); + else + sbuf_printf(&sb, GZ_SUFFIX); +#endif + } if (sbuf_error(&sb) != 0) { log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " "long\n", (long)pid, comm, (u_long)uid); Index: sys/kern/kern_zstdio.c =================================================================== --- /dev/null +++ sys/kern/kern_zstdio.c @@ -0,0 +1,276 @@ +/*- + * Copyright (c) 2014 Mark Johnston + * Copyright (c) 2017 Conrad Meyer + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include + +MALLOC_DEFINE(M_ZSTDIO, "zstdio", "zlib state"); + +struct zstdio_stream { + ZSTD_CCtx *zst_stream; + ZSTD_inBuffer zst_inbuffer; + ZSTD_outBuffer zst_outbuffer; + uint8_t * zst_buffer; /* output buffer */ + size_t zst_maxiosz; /* Max output IO size */ + off_t zst_off; /* offset into the output stream */ + zstdio_cb zst_cb; /* output callback */ + void * zst_arg; /* private callback arg */ + void * zst_static_wkspc; +}; + +static void *zst_alloc(void *, size_t); +static void zst_free(void *, void *); + +struct zstdio_stream * +zstdio_init(zstdio_cb cb, size_t maxiosizeout, int level, bool staticctx, + void *arg) +{ + ZSTD_CCtx *dump_compressor; + ZSTD_customMem cmem = { .customAlloc = zst_alloc, .customFree = zst_free }; + struct zstdio_stream *s; + void *wkspc, *owkspc, *buffer; + size_t wkspc_size, res, buf_size; + + if (staticctx) { + wkspc_size = ZSTD_estimateCStreamSize(level); + owkspc = wkspc = malloc(wkspc_size + 8, M_ZSTDIO, + M_WAITOK | M_NODUMP); + /* Zstd API requires 8-byte alignment. */ + if ((uintptr_t)wkspc % 8 != 0) + wkspc = (void *)roundup2((uintptr_t)wkspc, 8); + + dump_compressor = ZSTD_initStaticCCtx(wkspc, wkspc_size); + if (dump_compressor == NULL) { + printf("%s: workspace too small.\n", __func__); + return (NULL); + } + } else { + dump_compressor = ZSTD_createCCtx_advanced(cmem); + if (dump_compressor == NULL) + return (NULL); + res = ZSTD_initCStream(dump_compressor, level); + if (ZSTD_isError(res)) { + printf("%s: ZSTD_initCStream: %s\n", __func__, + ZSTD_getErrorName(res)); + ZSTD_freeCCtx(dump_compressor); + return (NULL); + } + } + + buf_size = ZSTD_CStreamOutSize() * 2; + buffer = malloc(buf_size, M_ZSTDIO, M_WAITOK | M_NODUMP); + + s = malloc(sizeof(*s), M_ZSTDIO, M_NODUMP | M_WAITOK); + s->zst_buffer = buffer; + s->zst_outbuffer.dst = buffer; + s->zst_outbuffer.size = buf_size; + s->zst_maxiosz = maxiosizeout; + s->zst_cb = cb; + s->zst_arg = arg; + s->zst_stream = dump_compressor; + if (staticctx) + s->zst_static_wkspc = owkspc; + else + s->zst_static_wkspc = NULL; + + zstdio_reset(s); + + return (s); +} + +void +zstdio_reset(struct zstdio_stream *s) +{ + size_t res; + + res = ZSTD_resetCStream(s->zst_stream, 0); + if (ZSTD_isError(res)) { + printf("%s: could not reset stream: %s\n", __func__, + ZSTD_getErrorName(res)); + return; + } + + s->zst_off = 0; + s->zst_inbuffer.src = NULL; + s->zst_inbuffer.size = 0; + s->zst_inbuffer.pos = 0; + s->zst_outbuffer.pos = 0; +} + +static int +zst_flush_intermediate(struct zstdio_stream *s) +{ + size_t bytes_to_dump; + int error; + + /* Flush as many full output blocks as possible. */ + /* XXX: 4096 is arbitrary safe HDD block size for kernel dumps */ + while (s->zst_outbuffer.pos >= 4096) { + bytes_to_dump = rounddown(s->zst_outbuffer.pos, 4096); + + if (bytes_to_dump > s->zst_maxiosz) + bytes_to_dump = s->zst_maxiosz; + + error = s->zst_cb(s->zst_buffer, bytes_to_dump, s->zst_off, + s->zst_arg); + if (error != 0) + return (error); + + /* Shift any non-full blocks up to the front of the output buffer */ + s->zst_outbuffer.pos -= bytes_to_dump; + memmove(s->zst_outbuffer.dst, + (char *)s->zst_outbuffer.dst + bytes_to_dump, + s->zst_outbuffer.pos); + s->zst_off += bytes_to_dump; + } + return (0); +} + +int +zstdio_write(struct zstdio_stream *s, void *data, size_t len) +{ + size_t lastpos, rc; + int error; + + s->zst_inbuffer.src = data; + s->zst_inbuffer.size = len; + s->zst_inbuffer.pos = 0; + lastpos = 0; + + while (s->zst_inbuffer.pos < s->zst_inbuffer.size) { + rc = ZSTD_compressStream(s->zst_stream, &s->zst_outbuffer, + &s->zst_inbuffer); + if (ZSTD_isError(rc)) { + printf("%s: Compress failed on %p! (%s)\n", + __func__, data, ZSTD_getErrorName(rc)); + return (EIO); + } + + if (lastpos == s->zst_inbuffer.pos) { + /* + * XXX: May need flushStream to make forward progress + */ + printf("ZSTD: did not make forward progress @pos %zu\n", + lastpos); + return (EIO); + } + lastpos = s->zst_inbuffer.pos; + + error = zst_flush_intermediate(s); + if (error != 0) + return (error); + } + return (0); +} + +int +zstdio_flush(struct zstdio_stream *s) +{ + size_t rc, lastpos; + int error; + + /* + * Positive return indicates unflushed data remaining; need to call + * endStream again after clearing out room in output buffer. + */ + rc = 1; + lastpos = s->zst_outbuffer.pos; + while (rc > 0) { + rc = ZSTD_endStream(s->zst_stream, &s->zst_outbuffer); + if (ZSTD_isError(rc)) { + printf("%s: ZSTD_endStream failed (%s)\n", __func__, + ZSTD_getErrorName(rc)); + return (EIO); + } + if (lastpos == s->zst_outbuffer.pos) { + printf("%s: did not make forward progress endStream %zu\n", + __func__, lastpos); + return (EIO); + } + + error = zst_flush_intermediate(s); + if (error != 0) + return (error); + + lastpos = s->zst_outbuffer.pos; + } + + /* + * We've already done an intermediate flush, so all full blocks have + * been written. Only a partial block remains. Padding happens in a + * higher layer. + */ + if (s->zst_outbuffer.pos != 0) { + error = s->zst_cb(s->zst_buffer, s->zst_outbuffer.pos, + s->zst_off, s->zst_arg); + if (error != 0) + return (error); + } + + return (0); +} + +void +zstdio_fini(struct zstdio_stream *s) +{ + + if (s->zst_static_wkspc != NULL) + free(s->zst_static_wkspc, M_ZSTDIO); + else + ZSTD_freeCCtx(s->zst_stream); + free(s->zst_buffer, M_ZSTDIO); + free(s, M_ZSTDIO); +} + +static void * +zst_alloc(void *arg __unused, size_t sz) +{ + + /* + * Memory for Zstd state is allocated using M_NODUMP since it may be + * used to compress a kernel dump, and we don't want zstd to attempt to + * compress its own state. + */ + return (malloc(sz, M_ZSTDIO, M_WAITOK | M_ZERO | M_NODUMP)); +} + +static void +zst_free(void *arg __unused, void *ptr) +{ + + free(ptr, M_ZSTDIO); +} Index: sys/sys/conf.h =================================================================== --- sys/sys/conf.h +++ sys/sys/conf.h @@ -339,6 +339,7 @@ off_t dumpoff; /* Offset of ongoing kernel dump. */ struct kerneldumpcrypto *kdc; /* Kernel dump crypto. */ struct kerneldumpgz *kdgz; /* Kernel dump compression. */ + struct kerneldumpzstd *kdzst; /* Kernel dump compression. */ }; extern int dumping; /* system is dumping */ Index: sys/sys/kerneldump.h =================================================================== --- sys/sys/kerneldump.h +++ sys/sys/kerneldump.h @@ -57,6 +57,7 @@ #define KERNELDUMP_COMP_NONE 0 #define KERNELDUMP_COMP_GZIP 1 +#define KERNELDUMP_COMP_ZSTD 2 #define KERNELDUMP_ENC_NONE 0 #define KERNELDUMP_ENC_AES_256_CBC 1 Index: sys/sys/zstdio.h =================================================================== --- /dev/null +++ sys/sys/zstdio.h @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2014 Mark Johnston + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS__ZSTDIO_H_ +#define _SYS__ZSTDIO_H_ + +#ifdef _KERNEL + +typedef int (*zstdio_cb)(void *, size_t, off_t, void *); + +struct zstdio_stream; + +struct zstdio_stream *zstdio_init(zstdio_cb cb, size_t, int, bool, void *); +void zstdio_reset(struct zstdio_stream *); +int zstdio_write(struct zstdio_stream *, void *, size_t); +int zstdio_flush(struct zstdio_stream *); +void zstdio_fini(struct zstdio_stream *); + +#endif /* _KERNEL */ + +#endif /* _SYS__ZSTDIO_H_ */