diff --git a/include/sys/blake3.h b/include/sys/blake3.h index e6650372ccda..b3391c5f2349 100644 --- a/include/sys/blake3.h +++ b/include/sys/blake3.h @@ -1,120 +1,125 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor * Copyright (c) 2021 Tino Reichardt */ #ifndef BLAKE3_H #define BLAKE3_H #ifdef _KERNEL #include #else #include #include #endif #ifdef __cplusplus extern "C" { #endif #define BLAKE3_KEY_LEN 32 #define BLAKE3_OUT_LEN 32 #define BLAKE3_MAX_DEPTH 54 #define BLAKE3_BLOCK_LEN 64 #define BLAKE3_CHUNK_LEN 1024 /* * This struct is a private implementation detail. * It has to be here because it's part of BLAKE3_CTX below. */ typedef struct { uint32_t cv[8]; uint64_t chunk_counter; uint8_t buf[BLAKE3_BLOCK_LEN]; uint8_t buf_len; uint8_t blocks_compressed; uint8_t flags; } blake3_chunk_state_t; typedef struct { uint32_t key[8]; blake3_chunk_state_t chunk; uint8_t cv_stack_len; /* * The stack size is MAX_DEPTH + 1 because we do lazy merging. For * example, with 7 chunks, we have 3 entries in the stack. Adding an * 8th chunk requires a 4th entry, rather than merging everything down * to 1, because we don't know whether more input is coming. This is * different from how the reference implementation does things. */ uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN]; /* const blake3_impl_ops_t *ops */ const void *ops; } BLAKE3_CTX; /* init the context for hash operation */ void Blake3_Init(BLAKE3_CTX *ctx); /* init the context for a MAC and/or tree hash operation */ void Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN]); /* process the input bytes */ void Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t input_len); /* finalize the hash computation and output the result */ void Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out); /* finalize the hash computation and output the result */ void Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out, size_t out_len); +/* these are pre-allocated contexts */ +extern void **blake3_per_cpu_ctx; +extern void blake3_per_cpu_ctx_init(void); +extern void blake3_per_cpu_ctx_fini(void); + /* return number of supported implementations */ extern int blake3_get_impl_count(void); /* return id of selected implementation */ extern int blake3_get_impl_id(void); /* return name of selected implementation */ extern const char *blake3_get_impl_name(void); /* setup id as fastest implementation */ extern void blake3_set_impl_fastest(uint32_t id); /* set implementation by id */ extern void blake3_set_impl_id(uint32_t id); /* set implementation by name */ extern int blake3_set_impl_name(const char *name); /* set startup implementation */ extern void blake3_setup_impl(void); #ifdef __cplusplus } #endif #endif /* BLAKE3_H */ diff --git a/module/icp/algs/blake3/blake3_impl.c b/module/icp/algs/blake3/blake3_impl.c index c3268ec13dad..c3809a2827be 100644 --- a/module/icp/algs/blake3/blake3_impl.c +++ b/module/icp/algs/blake3/blake3_impl.c @@ -1,256 +1,284 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2021-2022 Tino Reichardt */ #include #include #include "blake3_impl.h" static const blake3_impl_ops_t *const blake3_impls[] = { &blake3_generic_impl, #if defined(__aarch64__) || \ (defined(__x86_64) && defined(HAVE_SSE2)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) &blake3_sse2_impl, #endif #if defined(__aarch64__) || \ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) &blake3_sse41_impl, #endif #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) &blake3_avx2_impl, #endif #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) &blake3_avx512_impl, #endif }; /* this pointer holds current ops for implementation */ static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl; /* special implementation selections */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) #define IMPL_USER (UINT32_MAX-2) #define IMPL_PARAM (UINT32_MAX-3) #define IMPL_READ(i) (*(volatile uint32_t *) &(i)) static uint32_t icp_blake3_impl = IMPL_FASTEST; #define BLAKE3_IMPL_NAME_MAX 16 /* id of fastest implementation */ static uint32_t blake3_fastest_id = 0; /* currently used id */ static uint32_t blake3_current_id = 0; /* id of module parameter (-1 == unused) */ static int blake3_param_id = -1; /* return number of supported implementations */ int blake3_get_impl_count(void) { static int impls = 0; int i; if (impls) return (impls); for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) { if (!blake3_impls[i]->is_supported()) continue; impls++; } return (impls); } /* return id of selected implementation */ int blake3_get_impl_id(void) { return (blake3_current_id); } /* return name of selected implementation */ const char * blake3_get_impl_name(void) { return (blake3_selected_impl->name); } /* setup id as fastest implementation */ void blake3_set_impl_fastest(uint32_t id) { blake3_fastest_id = id; } /* set implementation by id */ void blake3_set_impl_id(uint32_t id) { int i, cid; /* select fastest */ if (id == IMPL_FASTEST) id = blake3_fastest_id; /* select next or first */ if (id == IMPL_CYCLE) id = (++blake3_current_id) % blake3_get_impl_count(); /* 0..N for the real impl */ for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { if (!blake3_impls[i]->is_supported()) continue; if (cid == id) { blake3_current_id = cid; blake3_selected_impl = blake3_impls[i]; return; } cid++; } } /* set implementation by name */ int blake3_set_impl_name(const char *name) { int i, cid; if (strcmp(name, "fastest") == 0) { atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST); blake3_set_impl_id(IMPL_FASTEST); return (0); } else if (strcmp(name, "cycle") == 0) { atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE); blake3_set_impl_id(IMPL_CYCLE); return (0); } for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { if (!blake3_impls[i]->is_supported()) continue; if (strcmp(name, blake3_impls[i]->name) == 0) { if (icp_blake3_impl == IMPL_PARAM) { blake3_param_id = cid; return (0); } blake3_selected_impl = blake3_impls[i]; blake3_current_id = cid; return (0); } cid++; } return (-EINVAL); } /* setup implementation */ void blake3_setup_impl(void) { switch (IMPL_READ(icp_blake3_impl)) { case IMPL_PARAM: blake3_set_impl_id(blake3_param_id); atomic_swap_32(&icp_blake3_impl, IMPL_USER); break; case IMPL_FASTEST: blake3_set_impl_id(IMPL_FASTEST); break; case IMPL_CYCLE: blake3_set_impl_id(IMPL_CYCLE); break; default: blake3_set_impl_id(blake3_current_id); break; } } /* return selected implementation */ const blake3_impl_ops_t * blake3_impl_get_ops(void) { /* each call to ops will cycle */ if (icp_blake3_impl == IMPL_CYCLE) blake3_set_impl_id(IMPL_CYCLE); return (blake3_selected_impl); } +#if defined(_KERNEL) +void **blake3_per_cpu_ctx; + +void +blake3_per_cpu_ctx_init(void) +{ + /* + * Create "The Godfather" ptr to hold all blake3 ctx + */ + blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); + for (int i = 0; i < max_ncpus; i++) { + blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), + KM_SLEEP); + } +} + +void +blake3_per_cpu_ctx_fini(void) +{ + for (int i = 0; i < max_ncpus; i++) { + memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX)); + kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX)); + } + memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); + kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); +} +#endif + #if defined(_KERNEL) && defined(__linux__) static int icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp) { char req_name[BLAKE3_IMPL_NAME_MAX]; size_t i; /* sanitize input */ i = strnlen(name, BLAKE3_IMPL_NAME_MAX); if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX) return (-EINVAL); strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX); while (i > 0 && isspace(req_name[i-1])) i--; req_name[i] = '\0'; atomic_swap_32(&icp_blake3_impl, IMPL_PARAM); return (blake3_set_impl_name(req_name)); } static int icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp) { int i, cid, cnt = 0; char *fmt; /* cycling */ fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle "; cnt += sprintf(buffer + cnt, fmt); /* fastest one */ fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest "; cnt += sprintf(buffer + cnt, fmt); /* user selected */ for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) { if (!blake3_impls[i]->is_supported()) continue; fmt = (icp_blake3_impl == IMPL_USER && cid == blake3_current_id) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name); cid++; } buffer[cnt] = 0; return (cnt); } module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get, NULL, 0644); MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation."); #endif diff --git a/module/zfs/blake3_zfs.c b/module/zfs/blake3_zfs.c index 51c455fe7237..7560f30fd4e4 100644 --- a/module/zfs/blake3_zfs.c +++ b/module/zfs/blake3_zfs.c @@ -1,113 +1,117 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2022 Tino Reichardt */ #include #include #include #include static int blake3_incremental(void *buf, size_t size, void *arg) { BLAKE3_CTX *ctx = arg; Blake3_Update(ctx, buf, size); return (0); } /* * Computes a native 256-bit BLAKE3 MAC checksum. Please note that this * function requires the presence of a ctx_template that should be allocated * using abd_checksum_blake3_tmpl_init. */ void abd_checksum_blake3_native(abd_t *abd, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { - BLAKE3_CTX *ctx; - - ctx = kmem_alloc(sizeof (*ctx), KM_NOSLEEP); - ASSERT(ctx != 0); ASSERT(ctx_template != 0); +#if defined(_KERNEL) + BLAKE3_CTX *ctx = blake3_per_cpu_ctx[CPU_SEQID_UNSTABLE]; +#else + BLAKE3_CTX *ctx = kmem_alloc(sizeof (*ctx), KM_SLEEP); +#endif + memcpy(ctx, ctx_template, sizeof (*ctx)); (void) abd_iterate_func(abd, 0, size, blake3_incremental, ctx); Blake3_Final(ctx, (uint8_t *)zcp); +#if !defined(_KERNEL) memset(ctx, 0, sizeof (*ctx)); kmem_free(ctx, sizeof (*ctx)); +#endif } /* * Byteswapped version of abd_checksum_blake3_native. This just invokes * the native checksum function and byteswaps the resulting checksum (since * BLAKE3 is internally endian-insensitive). */ void abd_checksum_blake3_byteswap(abd_t *abd, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { zio_cksum_t tmp; ASSERT(ctx_template != 0); abd_checksum_blake3_native(abd, size, ctx_template, &tmp); zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]); } /* * Allocates a BLAKE3 MAC template suitable for using in BLAKE3 MAC checksum * computations and returns a pointer to it. */ void * abd_checksum_blake3_tmpl_init(const zio_cksum_salt_t *salt) { BLAKE3_CTX *ctx; ASSERT(sizeof (salt->zcs_bytes) == 32); /* init reference object */ ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP); Blake3_InitKeyed(ctx, salt->zcs_bytes); return (ctx); } /* * Frees a BLAKE3 context template previously allocated using * zio_checksum_blake3_tmpl_init. */ void abd_checksum_blake3_tmpl_free(void *ctx_template) { BLAKE3_CTX *ctx = ctx_template; memset(ctx, 0, sizeof (*ctx)); kmem_free(ctx, sizeof (*ctx)); } diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c index 3ebe08541b0b..639784287d72 100644 --- a/module/zfs/zfs_chksum.c +++ b/module/zfs/zfs_chksum.c @@ -1,316 +1,323 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2021 Tino Reichardt */ #include #include #include #include #include #include static kstat_t *chksum_kstat = NULL; typedef struct { const char *name; const char *impl; uint64_t bs1k; uint64_t bs4k; uint64_t bs16k; uint64_t bs64k; uint64_t bs256k; uint64_t bs1m; uint64_t bs4m; zio_cksum_salt_t salt; zio_checksum_t *(func); zio_checksum_tmpl_init_t *(init); zio_checksum_tmpl_free_t *(free); } chksum_stat_t; static int chksum_stat_cnt = 0; static chksum_stat_t *chksum_stat_data = 0; /* * i3-1005G1 test output: * * implementation 1k 4k 16k 64k 256k 1m 4m * fletcher-4 5421 15001 26468 32555 34720 32801 18847 * edonr-generic 1196 1602 1761 1749 1762 1759 1751 * skein-generic 546 591 608 615 619 612 616 * sha256-generic 246 270 274 274 277 275 276 * sha256-avx 262 296 304 307 307 307 306 * sha256-sha-ni 769 1072 1172 1220 1219 1232 1228 * sha256-openssl 240 300 316 314 304 285 276 * sha512-generic 333 374 385 392 391 393 392 * sha512-openssl 353 441 467 476 472 467 426 * sha512-avx 362 444 473 475 479 476 478 * sha512-avx2 394 500 530 538 543 545 542 * blake3-generic 308 313 313 313 312 313 312 * blake3-sse2 402 1289 1423 1446 1432 1458 1413 * blake3-sse41 427 1470 1625 1704 1679 1607 1629 * blake3-avx2 428 1920 3095 3343 3356 3318 3204 * blake3-avx512 473 2687 4905 5836 5844 5643 5374 */ static int chksum_stat_kstat_headers(char *buf, size_t size) { ssize_t off = 0; off += snprintf(buf + off, size, "%-23s", "implementation"); off += snprintf(buf + off, size - off, "%8s", "1k"); off += snprintf(buf + off, size - off, "%8s", "4k"); off += snprintf(buf + off, size - off, "%8s", "16k"); off += snprintf(buf + off, size - off, "%8s", "64k"); off += snprintf(buf + off, size - off, "%8s", "256k"); off += snprintf(buf + off, size - off, "%8s", "1m"); (void) snprintf(buf + off, size - off, "%8s\n", "4m"); return (0); } static int chksum_stat_kstat_data(char *buf, size_t size, void *data) { chksum_stat_t *cs; ssize_t off = 0; char b[24]; cs = (chksum_stat_t *)data; snprintf(b, 23, "%s-%s", cs->name, cs->impl); off += snprintf(buf + off, size - off, "%-23s", b); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs1k); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs4k); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs16k); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs64k); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs256k); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs1m); (void) snprintf(buf + off, size - off, "%8llu\n", (u_longlong_t)cs->bs4m); return (0); } static void * chksum_stat_kstat_addr(kstat_t *ksp, loff_t n) { if (n < chksum_stat_cnt) ksp->ks_private = (void *)(chksum_stat_data + n); else ksp->ks_private = NULL; return (ksp->ks_private); } static void chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round, uint64_t *result) { hrtime_t start; uint64_t run_bw, run_time_ns, run_count = 0, size = 0; uint32_t l, loops = 0; zio_cksum_t zcp; switch (round) { case 1: /* 1k */ size = 1<<10; loops = 128; break; case 2: /* 2k */ size = 1<<12; loops = 64; break; case 3: /* 4k */ size = 1<<14; loops = 32; break; case 4: /* 16k */ size = 1<<16; loops = 16; break; case 5: /* 256k */ size = 1<<18; loops = 8; break; case 6: /* 1m */ size = 1<<20; loops = 4; break; case 7: /* 4m */ size = 1<<22; loops = 1; break; } kpreempt_disable(); start = gethrtime(); do { for (l = 0; l < loops; l++, run_count++) cs->func(abd, size, ctx, &zcp); run_time_ns = gethrtime() - start; } while (run_time_ns < MSEC2NSEC(1)); kpreempt_enable(); run_bw = size * run_count * NANOSEC; run_bw /= run_time_ns; /* B/s */ *result = run_bw/1024/1024; /* MiB/s */ } static void chksum_benchit(chksum_stat_t *cs) { abd_t *abd; void *ctx = 0; void *salt = &cs->salt.zcs_bytes; /* allocate test memory via default abd interface */ abd = abd_alloc_linear(1<<22, B_FALSE); memset(salt, 0, sizeof (cs->salt.zcs_bytes)); if (cs->init) { ctx = cs->init(&cs->salt); } chksum_run(cs, abd, ctx, 1, &cs->bs1k); chksum_run(cs, abd, ctx, 2, &cs->bs4k); chksum_run(cs, abd, ctx, 3, &cs->bs16k); chksum_run(cs, abd, ctx, 4, &cs->bs64k); chksum_run(cs, abd, ctx, 5, &cs->bs256k); chksum_run(cs, abd, ctx, 6, &cs->bs1m); chksum_run(cs, abd, ctx, 7, &cs->bs4m); /* free up temp memory */ if (cs->free) { cs->free(ctx); } abd_free(abd); } /* * Initialize and benchmark all supported implementations. */ static void chksum_benchmark(void) { #ifndef _KERNEL /* we need the benchmark only for the kernel module */ return; #endif chksum_stat_t *cs; int cbid = 0, id; uint64_t max = 0; /* space for the benchmark times */ chksum_stat_cnt = 4; chksum_stat_cnt += blake3_get_impl_count(); chksum_stat_data = (chksum_stat_t *)kmem_zalloc( sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); /* edonr */ cs = &chksum_stat_data[cbid++]; cs->init = abd_checksum_edonr_tmpl_init; cs->func = abd_checksum_edonr_native; cs->free = abd_checksum_edonr_tmpl_free; cs->name = "edonr"; cs->impl = "generic"; chksum_benchit(cs); /* skein */ cs = &chksum_stat_data[cbid++]; cs->init = abd_checksum_skein_tmpl_init; cs->func = abd_checksum_skein_native; cs->free = abd_checksum_skein_tmpl_free; cs->name = "skein"; cs->impl = "generic"; chksum_benchit(cs); /* sha256 */ cs = &chksum_stat_data[cbid++]; cs->init = 0; cs->func = abd_checksum_SHA256; cs->free = 0; cs->name = "sha256"; cs->impl = "generic"; chksum_benchit(cs); /* sha512 */ cs = &chksum_stat_data[cbid++]; cs->init = 0; cs->func = abd_checksum_SHA512_native; cs->free = 0; cs->name = "sha512"; cs->impl = "generic"; chksum_benchit(cs); /* blake3 */ for (id = 0; id < blake3_get_impl_count(); id++) { blake3_set_impl_id(id); cs = &chksum_stat_data[cbid++]; cs->init = abd_checksum_blake3_tmpl_init; cs->func = abd_checksum_blake3_native; cs->free = abd_checksum_blake3_tmpl_free; cs->name = "blake3"; cs->impl = blake3_get_impl_name(); chksum_benchit(cs); if (cs->bs256k > max) { max = cs->bs256k; blake3_set_impl_fastest(id); } } } void chksum_init(void) { +#ifdef _KERNEL + blake3_per_cpu_ctx_init(); +#endif /* Benchmark supported implementations */ chksum_benchmark(); /* Install kstats for all implementations */ chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc", KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); if (chksum_kstat != NULL) { chksum_kstat->ks_data = NULL; chksum_kstat->ks_ndata = UINT32_MAX; kstat_set_raw_ops(chksum_kstat, chksum_stat_kstat_headers, chksum_stat_kstat_data, chksum_stat_kstat_addr); kstat_install(chksum_kstat); } /* setup implementations */ blake3_setup_impl(); } void chksum_fini(void) { if (chksum_kstat != NULL) { kstat_delete(chksum_kstat); chksum_kstat = NULL; } if (chksum_stat_cnt) { kmem_free(chksum_stat_data, sizeof (chksum_stat_t) * chksum_stat_cnt); chksum_stat_cnt = 0; chksum_stat_data = 0; } + +#ifdef _KERNEL + blake3_per_cpu_ctx_fini(); +#endif }