Changeset View
Standalone View
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zstd.c
- This file was added.
Property | Old Value | New Value |
---|---|---|
svn:eol-style | null | native \ No newline at end of property |
svn:keywords | null | FreeBSD=%H \ No newline at end of property |
svn:mime-type | null | text/plain \ No newline at end of property |
/* | |||||
* CDDL HEADER START | |||||
* | |||||
* The contents of this file are subject to the terms of the | |||||
* Common Development and Distribution License (the "License"). | |||||
* You may not use this file except in compliance with the License. | |||||
* | |||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |||||
* or http://www.opensolaris.org/os/licensing. | |||||
* See the License for the specific language governing permissions | |||||
* and limitations under the License. | |||||
* | |||||
* When distributing Covered Code, include this CDDL HEADER in each | |||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |||||
* If applicable, add the following below this CDDL HEADER, with the | |||||
* fields enclosed by brackets "[]" replaced with your own identifying | |||||
* information: Portions Copyright [yyyy] [name of copyright owner] | |||||
* | |||||
* CDDL HEADER END | |||||
*/ | |||||
/* | |||||
* Copyright (c) 2016-2018 by Klara Systems Inc. | |||||
* Copyright (c) 2016-2018 Allan Jude <allanjude@freebsd.org>. | |||||
*/ | |||||
#include <sys/param.h> | |||||
#include <sys/zfs_context.h> | |||||
#include <sys/zio_compress.h> | |||||
#include <sys/spa.h> | |||||
#include <sys/malloc.h> | |||||
#define ZSTD_STATIC_LINKING_ONLY | |||||
#include <zstd.h> | |||||
#include <zstd_errors.h> | |||||
#define ZSTD_KMEM_MAGIC 0x20160831 | |||||
#define ZSTD_COOKIE_SHIFT 26 | |||||
#define ZSTD_COOKIE_MASK ((1U<<ZSTD_COOKIE_SHIFT)-1) | |||||
/* | |||||
* XXX: TODO: Investigate using ZSTD_compressBlock for small blocks | |||||
*/ | |||||
static size_t real_zstd_compress(const char *source, char *dest, int isize, | |||||
int osize, int level); | |||||
static size_t real_zstd_decompress(const char *source, char *dest, int isize, | |||||
int maxosize); | |||||
void *zstd_alloc(void *opaque, size_t size); | |||||
void zstd_free(void *opaque, void *ptr); | |||||
static const ZSTD_customMem zstd_malloc = { | |||||
zstd_alloc, | |||||
zstd_free, | |||||
NULL, | |||||
}; | |||||
enum zstd_kmem_type { | |||||
ZSTD_KMEM_UNKNOWN = 0, | |||||
ZSTD_KMEM_CCTX, | |||||
ZSTD_KMEM_WRKSPC_4K_MIN, | |||||
ZSTD_KMEM_WRKSPC_4K_DEF, | |||||
ZSTD_KMEM_WRKSPC_4K_MAX, | |||||
ZSTD_KMEM_WRKSPC_16K_MIN, | |||||
ZSTD_KMEM_WRKSPC_16K_DEF, | |||||
ZSTD_KMEM_WRKSPC_16K_MAX, | |||||
ZSTD_KMEM_WRKSPC_128K_MIN, | |||||
ZSTD_KMEM_WRKSPC_128K_DEF, | |||||
ZSTD_KMEM_WRKSPC_128K_MAX, | |||||
/* SPA_MAXBLOCKSIZE */ | |||||
ZSTD_KMEM_WRKSPC_MBS_MIN, | |||||
ZSTD_KMEM_WRKSPC_MBS_DEF, | |||||
ZSTD_KMEM_WRKSPC_MBS_MAX, | |||||
ZSTD_KMEM_DCTX, | |||||
ZSTD_KMEM_COUNT, | |||||
}; | |||||
struct zstd_kmem { | |||||
uint_t kmem_magic; | |||||
enum zstd_kmem_type kmem_type; | |||||
size_t kmem_size; | |||||
}; | |||||
struct zstd_kmem_config { | |||||
size_t block_size; | |||||
int compress_level; | |||||
char* cache_name; | |||||
}; | |||||
static kmem_cache_t *zstd_kmem_cache[ZSTD_KMEM_COUNT] = { NULL }; | |||||
static struct zstd_kmem zstd_cache_size[ZSTD_KMEM_COUNT] = { | |||||
{ ZSTD_KMEM_MAGIC, 0, 0 } }; | |||||
static struct zstd_kmem_config zstd_cache_config[ZSTD_KMEM_COUNT] = { | |||||
{ 0, 0, "zstd_unknown" }, | |||||
{ 0, 0, "zstd_cctx" }, | |||||
{ 4096, ZIO_ZSTD_LEVEL_MIN, "zstd_wrkspc_4k_min" }, | |||||
{ 4096, ZIO_ZSTD_LEVEL_DEFAULT, "zstd_wrkspc_4k_def" }, | |||||
{ 4096, ZIO_ZSTD_LEVEL_MAX, "zstd_wrkspc_4k_max" }, | |||||
{ 16384, ZIO_ZSTD_LEVEL_MIN, "zstd_wrkspc_16k_min" }, | |||||
{ 16384, ZIO_ZSTD_LEVEL_DEFAULT, "zstd_wrkspc_16k_def" }, | |||||
{ 16384, ZIO_ZSTD_LEVEL_MAX, "zstd_wrkspc_16k_max" }, | |||||
{ SPA_OLD_MAXBLOCKSIZE, ZIO_ZSTD_LEVEL_MIN, "zstd_wrkspc_128k_min" }, | |||||
{ SPA_OLD_MAXBLOCKSIZE, ZIO_ZSTD_LEVEL_DEFAULT, | |||||
"zstd_wrkspc_128k_def" }, | |||||
{ SPA_OLD_MAXBLOCKSIZE, ZIO_ZSTD_LEVEL_MAX, "zstd_wrkspc_128k_max" }, | |||||
{ SPA_MAXBLOCKSIZE, ZIO_ZSTD_LEVEL_MIN, "zstd_wrkspc_mbs_min" }, | |||||
{ SPA_MAXBLOCKSIZE, ZIO_ZSTD_LEVEL_DEFAULT, "zstd_wrkspc_mbs_def" }, | |||||
{ SPA_MAXBLOCKSIZE, ZIO_ZSTD_LEVEL_MAX, "zstd_wrkspc_mbs_max" }, | |||||
{ 0, 0, "zstd_dctx" }, | |||||
}; | |||||
static int | |||||
zstd_compare(const void *a, const void *b) | |||||
{ | |||||
struct zstd_kmem *x, *y; | |||||
x = (struct zstd_kmem*)a; | |||||
y = (struct zstd_kmem*)b; | |||||
ASSERT(x->kmem_magic == ZSTD_KMEM_MAGIC); | |||||
ASSERT(y->kmem_magic == ZSTD_KMEM_MAGIC); | |||||
if (x->kmem_size > y->kmem_size) { | |||||
return (1); | |||||
} else if (x->kmem_size == y->kmem_size) { | |||||
return (0); | |||||
} else { | |||||
return (-1); | |||||
} | |||||
} | |||||
size_t | |||||
zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) | |||||
{ | |||||
size_t c_len; | |||||
uint32_t cookie; | |||||
char *dest = d_start; | |||||
ASSERT(d_len >= sizeof (cookie)); | |||||
ASSERT(d_len <= s_len); | |||||
/* XXX: this could overflow, but we never have blocks that big */ | |||||
c_len = real_zstd_compress(s_start, &dest[sizeof (cookie)], s_len, | |||||
d_len - sizeof (cookie), n); | |||||
/* Signal an error if the compression routine returned an error. */ | |||||
if (ZSTD_isError(c_len)) | |||||
return (s_len); | |||||
/* | |||||
* Encode the compresed buffer size, and compression level (top 6 bits) | |||||
* at the start of the block. We'll need the size later in decompression | |||||
* to counter the effects of padding which might be added to the | |||||
* compressed buffer and which, if unhandled, would confuse the hell out | |||||
* of our decompression function. We may need the compression level | |||||
* if compressed_arc is disabled, to match the compression settings | |||||
* to write to the L2ARC. | |||||
*/ | |||||
cookie = c_len | (n << ZSTD_COOKIE_SHIFT); | |||||
*(uint32_t *)dest = BE_32(cookie); | |||||
return (c_len + sizeof (cookie)); | |||||
} | |||||
int | |||||
zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) | |||||
{ | |||||
const char *src = s_start; | |||||
uint32_t cookie = BE_IN32(src); | |||||
uint32_t bufsiz = cookie & ZSTD_COOKIE_MASK; | |||||
ASSERT(d_len >= s_len); | |||||
/* invalid compressed buffer size encoded at start */ | |||||
if (bufsiz + sizeof (bufsiz) > s_len) | |||||
return (1); | |||||
/* | |||||
* Returns 0 on success (decompression function returned non-negative) | |||||
* and non-zero on failure (decompression function returned negative. | |||||
*/ | |||||
if (ZSTD_isError(real_zstd_decompress(&src[sizeof (bufsiz)], d_start, | |||||
bufsiz, d_len))) | |||||
return (1); | |||||
return (0); | |||||
} | |||||
int | |||||
zstd_getlevel(void *s_start, size_t s_len __unused) | |||||
{ | |||||
const char *src = s_start; | |||||
uint32_t cookie = BE_IN32(src); | |||||
int res; | |||||
yann.collet.73_gmail.com: Performance suggestion :
re-using an _existing_ compression context saves a lot of allocation… | |||||
Not Done Inline ActionsI have not worked out had to handle that safely yet. There are 2 issues: There are multiple kernel threads that will be calling this function concurrently, and they may be doing different compression levels. I don't want to have N unused -19 compression contexts outstanding, as they are up to 50MB each. So preallocating a number of contexts probably won't work because of the diversity of input block sizes and compression levels, and the multi-threaded nature of the surrounding code. ZSTD won't be invoked in its multi-threaded mode, but we may be compressing many independent blocks at once. allanjude: I have not worked out had to handle that safely yet. There are 2 issues:
There are multiple… | |||||
res = (cookie & ~SPA_COMPRESSMASK) >> ZSTD_COOKIE_SHIFT; | |||||
ASSERT(res <= ZIO_ZSTDLVL_LEVELS); | |||||
return (res); | |||||
} | |||||
static size_t | |||||
real_zstd_compress(const char *source, char *dest, int isize, int osize, | |||||
int level) | |||||
{ | |||||
size_t result; | |||||
ZSTD_CCtx *cctx; | |||||
ASSERT(level != 0); | |||||
if (level == ZIO_ZSTDLVL_DEFAULT) | |||||
level = ZIO_ZSTD_LEVEL_DEFAULT; | |||||
/* XXX: In ZSTD 1.3+ consider using ZSTD_initStaticCCtx() instead */ | |||||
cctx = ZSTD_createCCtx_advanced(zstd_malloc); | |||||
/* | |||||
Not Done Inline ActionsSame logic here as for compression side : The impact is less pronounced on the decompression side, because initialisation work is much lighter. yann.collet.73_gmail.com: Same logic here as for compression side :
re-using a context saves allocation and… | |||||
* out of kernel memory, gently fall through - this will disable | |||||
* compression in zio_compress_data | |||||
*/ | |||||
if (cctx == NULL) | |||||
return (0); | |||||
result = ZSTD_compressCCtx(cctx, dest, osize, source, isize, level); | |||||
ZSTD_freeCCtx(cctx); | |||||
return (result); | |||||
} | |||||
static size_t | |||||
real_zstd_decompress(const char *source, char *dest, int isize, int maxosize) | |||||
{ | |||||
size_t result; | |||||
ZSTD_DCtx *dctx; | |||||
dctx = ZSTD_createDCtx_advanced(zstd_malloc); | |||||
if (dctx == NULL) | |||||
return (ZSTD_error_memory_allocation); | |||||
result = ZSTD_decompressDCtx(dctx, dest, maxosize, source, isize); | |||||
ZSTD_freeDCtx(dctx); | |||||
return (result); | |||||
} | |||||
extern void * | |||||
zstd_alloc(void *opaque __unused, size_t size) | |||||
{ | |||||
size_t nbytes = sizeof(struct zstd_kmem) + size; | |||||
struct zstd_kmem *z; | |||||
enum zstd_kmem_type type; | |||||
int i; | |||||
type = ZSTD_KMEM_UNKNOWN; | |||||
for (i = 0; i < ZSTD_KMEM_COUNT; i++) { | |||||
if (nbytes <= zstd_cache_size[i].kmem_size) { | |||||
type = zstd_cache_size[i].kmem_type; | |||||
z = kmem_cache_alloc(zstd_kmem_cache[type], | |||||
KM_NOSLEEP | M_ZERO); | |||||
break; | |||||
} | |||||
} | |||||
/* No matching cache */ | |||||
if (type == ZSTD_KMEM_UNKNOWN) { | |||||
z = kmem_alloc(nbytes, KM_NOSLEEP | M_ZERO); | |||||
} | |||||
if (z == NULL) { | |||||
return (NULL); | |||||
} | |||||
z->kmem_magic = ZSTD_KMEM_MAGIC; | |||||
z->kmem_type = type; | |||||
z->kmem_size = nbytes; | |||||
return ((void*)z + (sizeof(struct zstd_kmem))); | |||||
} | |||||
extern void | |||||
zstd_free(void *opaque __unused, void *ptr) | |||||
{ | |||||
struct zstd_kmem *z = ptr - sizeof(struct zstd_kmem); | |||||
ASSERT(z->kmem_magic == ZSTD_KMEM_MAGIC); | |||||
ASSERT(z->kmem_type < ZSTD_KMEM_COUNT); | |||||
ASSERT(z->kmem_type >= ZSTD_KMEM_UNKNOWN); | |||||
if (z->kmem_type == ZSTD_KMEM_UNKNOWN) { | |||||
kmem_free(z, z->kmem_size); | |||||
} else { | |||||
kmem_cache_free(zstd_kmem_cache[z->kmem_type], z); | |||||
} | |||||
} | |||||
extern void | |||||
zstd_init(void) | |||||
{ | |||||
int i; | |||||
/* There is no estimate function for the CCtx itself */ | |||||
zstd_cache_size[1].kmem_magic = ZSTD_KMEM_MAGIC; | |||||
zstd_cache_size[1].kmem_type = 1; | |||||
zstd_cache_size[1].kmem_size = roundup2(zstd_cache_config[1].block_size | |||||
+ sizeof(struct zstd_kmem), PAGE_SIZE); | |||||
zstd_kmem_cache[1] = kmem_cache_create( | |||||
zstd_cache_config[1].cache_name, zstd_cache_size[1].kmem_size, | |||||
0, NULL, NULL, NULL, NULL, NULL, 0); | |||||
/* | |||||
* Estimate the size of the ZSTD CCtx workspace required for each record | |||||
* size at each compression level. | |||||
*/ | |||||
for (i = 2; i < ZSTD_KMEM_DCTX; i++) { | |||||
ASSERT(zstd_cache_config[i].cache_name != NULL); | |||||
zstd_cache_size[i].kmem_magic = ZSTD_KMEM_MAGIC; | |||||
zstd_cache_size[i].kmem_type = i; | |||||
zstd_cache_size[i].kmem_size = roundup2( | |||||
ZSTD_estimateCCtxSize_usingCParams( | |||||
ZSTD_getCParams(zstd_cache_config[i].compress_level, | |||||
zstd_cache_config[i].block_size, 0)) + | |||||
sizeof(struct zstd_kmem), PAGE_SIZE); | |||||
zstd_kmem_cache[i] = kmem_cache_create( | |||||
zstd_cache_config[i].cache_name, | |||||
zstd_cache_size[i].kmem_size, | |||||
0, NULL, NULL, NULL, NULL, NULL, 0); | |||||
} | |||||
/* Estimate the size of the decompression context */ | |||||
zstd_cache_size[i].kmem_magic = ZSTD_KMEM_MAGIC; | |||||
zstd_cache_size[i].kmem_type = i; | |||||
zstd_cache_size[i].kmem_size = roundup2(ZSTD_estimateDCtxSize() + | |||||
sizeof(struct zstd_kmem), PAGE_SIZE); | |||||
zstd_kmem_cache[i] = kmem_cache_create(zstd_cache_config[i].cache_name, | |||||
zstd_cache_size[i].kmem_size, 0, NULL, NULL, NULL, NULL, NULL, 0); | |||||
/* Sort the kmem caches for later searching */ | |||||
qsort(zstd_cache_size, ZSTD_KMEM_COUNT, sizeof(struct zstd_kmem), | |||||
zstd_compare); | |||||
} | |||||
extern void | |||||
zstd_fini(void) | |||||
{ | |||||
int i, type; | |||||
for (i = 0; i < ZSTD_KMEM_COUNT; i++) { | |||||
type = zstd_cache_size[i].kmem_type; | |||||
if (zstd_kmem_cache[type] != NULL) { | |||||
kmem_cache_destroy(zstd_kmem_cache[type]); | |||||
} | |||||
} | |||||
} |
Performance suggestion :
re-using an _existing_ compression context saves a lot of allocation and initialisation work.
The smaller the block size, the more measurable the savings.