Index: head/sys/crypto/aesni/aesni.c =================================================================== --- head/sys/crypto/aesni/aesni.c (revision 362623) +++ head/sys/crypto/aesni/aesni.c (revision 362624) @@ -1,918 +1,912 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek * Copyright (c) 2010 Konstantin Belousov * Copyright (c) 2014 The FreeBSD Foundation * Copyright (c) 2017 Conrad Meyer * All rights reserved. * * Portions of this software were developed by John-Mark Gurney * under sponsorship of the FreeBSD Foundation and * Rubicon Communications, LLC (Netgate). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) #include #elif defined(__amd64__) #include #endif static struct mtx_padalign *ctx_mtx; static struct fpu_kern_ctx **ctx_fpu; struct aesni_softc { int32_t cid; bool has_aes; bool has_sha; }; #define ACQUIRE_CTX(i, ctx) \ do { \ (i) = PCPU_GET(cpuid); \ mtx_lock(&ctx_mtx[(i)]); \ (ctx) = ctx_fpu[(i)]; \ } while (0) #define RELEASE_CTX(i, ctx) \ do { \ mtx_unlock(&ctx_mtx[(i)]); \ (i) = -1; \ (ctx) = NULL; \ } while (0) static int aesni_cipher_setup(struct aesni_session *ses, const struct crypto_session_params *csp); static int aesni_cipher_process(struct aesni_session *ses, struct cryptop *crp); static int aesni_cipher_crypt(struct aesni_session *ses, struct cryptop *crp, const struct crypto_session_params *csp); static int aesni_cipher_mac(struct aesni_session *ses, struct cryptop *crp, const struct crypto_session_params *csp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); static void aesni_identify(driver_t *drv, device_t parent) { /* NB: order 10 is so we get attached after h/w devices */ if (device_find_child(parent, "aesni", -1) == NULL && BUS_ADD_CHILD(parent, 10, "aesni", -1) == 0) panic("aesni: could not attach"); } static void detect_cpu_features(bool *has_aes, bool *has_sha) { *has_aes = ((cpu_feature2 & CPUID2_AESNI) != 0 && (cpu_feature2 & CPUID2_SSE41) != 0); *has_sha = ((cpu_stdext_feature & CPUID_STDEXT_SHA) != 0 && (cpu_feature2 & CPUID2_SSSE3) != 0); } static int aesni_probe(device_t dev) { bool has_aes, has_sha; detect_cpu_features(&has_aes, &has_sha); if (!has_aes && !has_sha) { device_printf(dev, "No AES or SHA support.\n"); return (EINVAL); } else if (has_aes && has_sha) device_set_desc(dev, "AES-CBC,AES-CCM,AES-GCM,AES-ICM,AES-XTS,SHA1,SHA256"); else if (has_aes) device_set_desc(dev, "AES-CBC,AES-CCM,AES-GCM,AES-ICM,AES-XTS"); else device_set_desc(dev, "SHA1,SHA256"); return (0); } static void aesni_cleanctx(void) { int i; /* XXX - no way to return driverid */ CPU_FOREACH(i) { if (ctx_fpu[i] != NULL) { mtx_destroy(&ctx_mtx[i]); fpu_kern_free_ctx(ctx_fpu[i]); } ctx_fpu[i] = NULL; } free(ctx_mtx, M_AESNI); ctx_mtx = NULL; free(ctx_fpu, M_AESNI); ctx_fpu = NULL; } static int aesni_attach(device_t dev) { struct aesni_softc *sc; int i; sc = device_get_softc(dev); sc->cid = crypto_get_driverid(dev, sizeof(struct aesni_session), CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_SYNC | CRYPTOCAP_F_ACCEL_SOFTWARE); if (sc->cid < 0) { device_printf(dev, "Could not get crypto driver id.\n"); return (ENOMEM); } ctx_mtx = malloc(sizeof *ctx_mtx * (mp_maxid + 1), M_AESNI, M_WAITOK|M_ZERO); ctx_fpu = malloc(sizeof *ctx_fpu * (mp_maxid + 1), M_AESNI, M_WAITOK|M_ZERO); CPU_FOREACH(i) { ctx_fpu[i] = fpu_kern_alloc_ctx(0); mtx_init(&ctx_mtx[i], "anifpumtx", NULL, MTX_DEF|MTX_NEW); } detect_cpu_features(&sc->has_aes, &sc->has_sha); return (0); } static int aesni_detach(device_t dev) { struct aesni_softc *sc; sc = device_get_softc(dev); crypto_unregister_all(sc->cid); aesni_cleanctx(); return (0); } static bool aesni_auth_supported(struct aesni_softc *sc, const struct crypto_session_params *csp) { if (!sc->has_sha) return (false); switch (csp->csp_auth_alg) { case CRYPTO_SHA1: case CRYPTO_SHA2_224: case CRYPTO_SHA2_256: case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_224_HMAC: case CRYPTO_SHA2_256_HMAC: break; default: return (false); } return (true); } static bool aesni_cipher_supported(struct aesni_softc *sc, const struct crypto_session_params *csp) { if (!sc->has_aes) return (false); switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: case CRYPTO_AES_ICM: if (csp->csp_ivlen != AES_BLOCK_LEN) return (false); return (sc->has_aes); case CRYPTO_AES_XTS: if (csp->csp_ivlen != AES_XTS_IV_LEN) return (false); return (sc->has_aes); default: return (false); } } static int aesni_probesession(device_t dev, const struct crypto_session_params *csp) { struct aesni_softc *sc; sc = device_get_softc(dev); if ((csp->csp_flags & ~(CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD)) != 0) return (EINVAL); switch (csp->csp_mode) { case CSP_MODE_DIGEST: if (!aesni_auth_supported(sc, csp)) return (EINVAL); break; case CSP_MODE_CIPHER: if (!aesni_cipher_supported(sc, csp)) return (EINVAL); break; case CSP_MODE_AEAD: switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: if (csp->csp_auth_mlen != 0 && csp->csp_auth_mlen != GMAC_DIGEST_LEN) return (EINVAL); if (csp->csp_ivlen != AES_GCM_IV_LEN || !sc->has_aes) return (EINVAL); break; case CRYPTO_AES_CCM_16: if (csp->csp_auth_mlen != 0 && csp->csp_auth_mlen != AES_CBC_MAC_HASH_LEN) return (EINVAL); if (csp->csp_ivlen != AES_CCM_IV_LEN || !sc->has_aes) return (EINVAL); break; default: return (EINVAL); } break; case CSP_MODE_ETA: if (!aesni_auth_supported(sc, csp) || !aesni_cipher_supported(sc, csp)) return (EINVAL); break; default: return (EINVAL); } return (CRYPTODEV_PROBE_ACCEL_SOFTWARE); } static int aesni_newsession(device_t dev, crypto_session_t cses, const struct crypto_session_params *csp) { struct aesni_softc *sc; struct aesni_session *ses; int error; sc = device_get_softc(dev); ses = crypto_get_driver_session(cses); switch (csp->csp_mode) { case CSP_MODE_DIGEST: case CSP_MODE_CIPHER: case CSP_MODE_AEAD: case CSP_MODE_ETA: break; default: return (EINVAL); } error = aesni_cipher_setup(ses, csp); if (error != 0) { CRYPTDEB("setup failed"); return (error); } return (0); } static int aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_session *ses; int error; ses = crypto_get_driver_session(crp->crp_session); error = aesni_cipher_process(ses, crp); crp->crp_etype = error; crypto_done(crp); return (0); } static uint8_t * aesni_cipher_alloc(struct cryptop *crp, int start, int length, bool *allocated) { uint8_t *addr; addr = crypto_contiguous_subsegment(crp, start, length); if (addr != NULL) { *allocated = false; return (addr); } addr = malloc(length, M_AESNI, M_NOWAIT); if (addr != NULL) { *allocated = true; crypto_copydata(crp, start, length, addr); } else *allocated = false; return (addr); } static device_method_t aesni_methods[] = { DEVMETHOD(device_identify, aesni_identify), DEVMETHOD(device_probe, aesni_probe), DEVMETHOD(device_attach, aesni_attach), DEVMETHOD(device_detach, aesni_detach), DEVMETHOD(cryptodev_probesession, aesni_probesession), DEVMETHOD(cryptodev_newsession, aesni_newsession), DEVMETHOD(cryptodev_process, aesni_process), DEVMETHOD_END }; static driver_t aesni_driver = { "aesni", aesni_methods, sizeof(struct aesni_softc), }; static devclass_t aesni_devclass; DRIVER_MODULE(aesni, nexus, aesni_driver, aesni_devclass, 0, 0); MODULE_VERSION(aesni, 1); MODULE_DEPEND(aesni, crypto, 1, 1, 1); static int intel_sha1_update(void *vctx, const void *vdata, u_int datalen) { struct sha1_ctxt *ctx = vctx; const char *data = vdata; size_t gaplen; size_t gapstart; size_t off; size_t copysiz; u_int blocks; off = 0; /* Do any aligned blocks without redundant copying. */ if (datalen >= 64 && ctx->count % 64 == 0) { blocks = datalen / 64; ctx->c.b64[0] += blocks * 64 * 8; intel_sha1_step(ctx->h.b32, data + off, blocks); off += blocks * 64; } while (off < datalen) { gapstart = ctx->count % 64; gaplen = 64 - gapstart; copysiz = (gaplen < datalen - off) ? gaplen : datalen - off; bcopy(&data[off], &ctx->m.b8[gapstart], copysiz); ctx->count += copysiz; ctx->count %= 64; ctx->c.b64[0] += copysiz * 8; if (ctx->count % 64 == 0) intel_sha1_step(ctx->h.b32, (void *)ctx->m.b8, 1); off += copysiz; } return (0); } static void SHA1_Init_fn(void *ctx) { sha1_init(ctx); } static void SHA1_Finalize_fn(void *digest, void *ctx) { sha1_result(ctx, digest); } static int intel_sha256_update(void *vctx, const void *vdata, u_int len) { SHA256_CTX *ctx = vctx; uint64_t bitlen; uint32_t r; u_int blocks; const unsigned char *src = vdata; /* Number of bytes left in the buffer from previous updates */ r = (ctx->count >> 3) & 0x3f; /* Convert the length into a number of bits */ bitlen = len << 3; /* Update number of bits */ ctx->count += bitlen; /* Handle the case where we don't need to perform any transforms */ if (len < 64 - r) { memcpy(&ctx->buf[r], src, len); return (0); } /* Finish the current block */ memcpy(&ctx->buf[r], src, 64 - r); intel_sha256_step(ctx->state, ctx->buf, 1); src += 64 - r; len -= 64 - r; /* Perform complete blocks */ if (len >= 64) { blocks = len / 64; intel_sha256_step(ctx->state, src, blocks); src += blocks * 64; len -= blocks * 64; } /* Copy left over data into buffer */ memcpy(ctx->buf, src, len); return (0); } static void SHA224_Init_fn(void *ctx) { SHA224_Init(ctx); } static void SHA224_Finalize_fn(void *digest, void *ctx) { SHA224_Final(digest, ctx); } static void SHA256_Init_fn(void *ctx) { SHA256_Init(ctx); } static void SHA256_Finalize_fn(void *digest, void *ctx) { SHA256_Final(digest, ctx); } static int aesni_authprepare(struct aesni_session *ses, int klen) { if (klen > SHA1_BLOCK_LEN) return (EINVAL); if ((ses->hmac && klen == 0) || (!ses->hmac && klen != 0)) return (EINVAL); return (0); } static int aesni_cipherprepare(const struct crypto_session_params *csp) { switch (csp->csp_cipher_alg) { case CRYPTO_AES_ICM: case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: case CRYPTO_AES_CBC: switch (csp->csp_cipher_klen * 8) { case 128: case 192: case 256: break; default: CRYPTDEB("invalid CBC/ICM/GCM key length"); return (EINVAL); } break; case CRYPTO_AES_XTS: switch (csp->csp_cipher_klen * 8) { case 256: case 512: break; default: CRYPTDEB("invalid XTS key length"); return (EINVAL); } break; default: return (EINVAL); } return (0); } static int aesni_cipher_setup(struct aesni_session *ses, const struct crypto_session_params *csp) { struct fpu_kern_ctx *ctx; int kt, ctxidx, error; switch (csp->csp_auth_alg) { case CRYPTO_SHA1_HMAC: ses->hmac = true; /* FALLTHROUGH */ case CRYPTO_SHA1: ses->hash_len = SHA1_HASH_LEN; ses->hash_init = SHA1_Init_fn; ses->hash_update = intel_sha1_update; ses->hash_finalize = SHA1_Finalize_fn; break; case CRYPTO_SHA2_224_HMAC: ses->hmac = true; /* FALLTHROUGH */ case CRYPTO_SHA2_224: ses->hash_len = SHA2_224_HASH_LEN; ses->hash_init = SHA224_Init_fn; ses->hash_update = intel_sha256_update; ses->hash_finalize = SHA224_Finalize_fn; break; case CRYPTO_SHA2_256_HMAC: ses->hmac = true; /* FALLTHROUGH */ case CRYPTO_SHA2_256: ses->hash_len = SHA2_256_HASH_LEN; ses->hash_init = SHA256_Init_fn; ses->hash_update = intel_sha256_update; ses->hash_finalize = SHA256_Finalize_fn; break; } if (ses->hash_len != 0) { if (csp->csp_auth_mlen == 0) ses->mlen = ses->hash_len; else ses->mlen = csp->csp_auth_mlen; error = aesni_authprepare(ses, csp->csp_auth_klen); if (error != 0) return (error); } error = aesni_cipherprepare(csp); if (error != 0) return (error); kt = is_fpu_kern_thread(0) || (csp->csp_cipher_alg == 0); if (!kt) { ACQUIRE_CTX(ctxidx, ctx); fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); } error = 0; if (csp->csp_cipher_key != NULL) aesni_cipher_setup_common(ses, csp, csp->csp_cipher_key, csp->csp_cipher_klen); if (!kt) { fpu_kern_leave(curthread, ctx); RELEASE_CTX(ctxidx, ctx); } return (error); } static int aesni_cipher_process(struct aesni_session *ses, struct cryptop *crp) { const struct crypto_session_params *csp; struct fpu_kern_ctx *ctx; int error, ctxidx; bool kt; csp = crypto_get_params(crp->crp_session); switch (csp->csp_cipher_alg) { case CRYPTO_AES_ICM: case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: if ((crp->crp_flags & CRYPTO_F_IV_SEPARATE) == 0) return (EINVAL); break; case CRYPTO_AES_CBC: case CRYPTO_AES_XTS: /* CBC & XTS can only handle full blocks for now */ if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) return (EINVAL); break; } ctx = NULL; ctxidx = 0; error = 0; kt = is_fpu_kern_thread(0); if (!kt) { ACQUIRE_CTX(ctxidx, ctx); fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); } /* Do work */ if (csp->csp_mode == CSP_MODE_ETA) { if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { error = aesni_cipher_crypt(ses, crp, csp); if (error == 0) error = aesni_cipher_mac(ses, crp, csp); } else { error = aesni_cipher_mac(ses, crp, csp); if (error == 0) error = aesni_cipher_crypt(ses, crp, csp); } } else if (csp->csp_mode == CSP_MODE_DIGEST) error = aesni_cipher_mac(ses, crp, csp); else error = aesni_cipher_crypt(ses, crp, csp); if (!kt) { fpu_kern_leave(curthread, ctx); RELEASE_CTX(ctxidx, ctx); } return (error); } static int aesni_cipher_crypt(struct aesni_session *ses, struct cryptop *crp, const struct crypto_session_params *csp) { uint8_t iv[AES_BLOCK_LEN], tag[GMAC_DIGEST_LEN]; uint8_t *authbuf, *buf, *outbuf; int error; bool encflag, allocated, authallocated, outallocated, outcopy; buf = aesni_cipher_alloc(crp, crp->crp_payload_start, crp->crp_payload_length, &allocated); if (buf == NULL) return (ENOMEM); outallocated = false; authallocated = false; authbuf = NULL; if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16 || csp->csp_cipher_alg == CRYPTO_AES_CCM_16) { if (crp->crp_aad != NULL) authbuf = crp->crp_aad; else authbuf = aesni_cipher_alloc(crp, crp->crp_aad_start, crp->crp_aad_length, &authallocated); if (authbuf == NULL) { error = ENOMEM; goto out; } } if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { outbuf = crypto_buffer_contiguous_subsegment(&crp->crp_obuf, crp->crp_payload_output_start, crp->crp_payload_length); if (outbuf == NULL) { outcopy = true; if (allocated) outbuf = buf; else { outbuf = malloc(crp->crp_payload_length, M_AESNI, M_NOWAIT); if (outbuf == NULL) { error = ENOMEM; goto out; } outallocated = true; } } else outcopy = false; } else { outbuf = buf; outcopy = allocated; } error = 0; encflag = CRYPTO_OP_IS_ENCRYPT(crp->crp_op); if (crp->crp_cipher_key != NULL) aesni_cipher_setup_common(ses, csp, crp->crp_cipher_key, csp->csp_cipher_klen); crypto_read_iv(crp, iv); switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: if (encflag) aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, crp->crp_payload_length, buf, outbuf, iv); else { if (buf != outbuf) memcpy(outbuf, buf, crp->crp_payload_length); aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, crp->crp_payload_length, outbuf, iv); } break; case CRYPTO_AES_ICM: /* encryption & decryption are the same */ aesni_encrypt_icm(ses->rounds, ses->enc_schedule, crp->crp_payload_length, buf, outbuf, iv); break; case CRYPTO_AES_XTS: if (encflag) aesni_encrypt_xts(ses->rounds, ses->enc_schedule, ses->xts_schedule, crp->crp_payload_length, buf, outbuf, iv); else aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, crp->crp_payload_length, buf, outbuf, iv); break; case CRYPTO_AES_NIST_GCM_16: if (encflag) { memset(tag, 0, sizeof(tag)); AES_GCM_encrypt(buf, outbuf, authbuf, iv, tag, crp->crp_payload_length, crp->crp_aad_length, csp->csp_ivlen, ses->enc_schedule, ses->rounds); crypto_copyback(crp, crp->crp_digest_start, sizeof(tag), tag); } else { crypto_copydata(crp, crp->crp_digest_start, sizeof(tag), tag); if (!AES_GCM_decrypt(buf, outbuf, authbuf, iv, tag, crp->crp_payload_length, crp->crp_aad_length, csp->csp_ivlen, ses->enc_schedule, ses->rounds)) error = EBADMSG; } break; case CRYPTO_AES_CCM_16: if (encflag) { memset(tag, 0, sizeof(tag)); AES_CCM_encrypt(buf, outbuf, authbuf, iv, tag, crp->crp_payload_length, crp->crp_aad_length, csp->csp_ivlen, ses->enc_schedule, ses->rounds); crypto_copyback(crp, crp->crp_digest_start, sizeof(tag), tag); } else { crypto_copydata(crp, crp->crp_digest_start, sizeof(tag), tag); if (!AES_CCM_decrypt(buf, outbuf, authbuf, iv, tag, crp->crp_payload_length, crp->crp_aad_length, csp->csp_ivlen, ses->enc_schedule, ses->rounds)) error = EBADMSG; } break; } if (outcopy && error == 0) crypto_copyback(crp, CRYPTO_HAS_OUTPUT_BUFFER(crp) ? crp->crp_payload_output_start : crp->crp_payload_start, crp->crp_payload_length, outbuf); out: - if (allocated) { - explicit_bzero(buf, crp->crp_payload_length); - free(buf, M_AESNI); - } - if (authallocated) { - explicit_bzero(authbuf, crp->crp_aad_length); - free(authbuf, M_AESNI); - } - if (outallocated) { - explicit_bzero(outbuf, crp->crp_payload_length); - free(outbuf, M_AESNI); - } + if (allocated) + zfree(buf, M_AESNI); + if (authallocated) + zfree(authbuf, M_AESNI); + if (outallocated) + zfree(outbuf, M_AESNI); explicit_bzero(iv, sizeof(iv)); explicit_bzero(tag, sizeof(tag)); return (error); } static int aesni_cipher_mac(struct aesni_session *ses, struct cryptop *crp, const struct crypto_session_params *csp) { union { struct SHA256Context sha2 __aligned(16); struct sha1_ctxt sha1 __aligned(16); } sctx; uint32_t res[SHA2_256_HASH_LEN / sizeof(uint32_t)]; const uint8_t *key; int i, keylen; if (crp->crp_auth_key != NULL) key = crp->crp_auth_key; else key = csp->csp_auth_key; keylen = csp->csp_auth_klen; if (ses->hmac) { uint8_t hmac_key[SHA1_BLOCK_LEN] __aligned(16); /* Inner hash: (K ^ IPAD) || data */ ses->hash_init(&sctx); for (i = 0; i < keylen; i++) hmac_key[i] = key[i] ^ HMAC_IPAD_VAL; for (i = keylen; i < sizeof(hmac_key); i++) hmac_key[i] = 0 ^ HMAC_IPAD_VAL; ses->hash_update(&sctx, hmac_key, sizeof(hmac_key)); if (crp->crp_aad != NULL) ses->hash_update(&sctx, crp->crp_aad, crp->crp_aad_length); else crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length, ses->hash_update, &sctx); if (CRYPTO_HAS_OUTPUT_BUFFER(crp) && CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) crypto_apply_buf(&crp->crp_obuf, crp->crp_payload_output_start, crp->crp_payload_length, ses->hash_update, &sctx); else crypto_apply(crp, crp->crp_payload_start, crp->crp_payload_length, ses->hash_update, &sctx); ses->hash_finalize(res, &sctx); /* Outer hash: (K ^ OPAD) || inner hash */ ses->hash_init(&sctx); for (i = 0; i < keylen; i++) hmac_key[i] = key[i] ^ HMAC_OPAD_VAL; for (i = keylen; i < sizeof(hmac_key); i++) hmac_key[i] = 0 ^ HMAC_OPAD_VAL; ses->hash_update(&sctx, hmac_key, sizeof(hmac_key)); ses->hash_update(&sctx, res, ses->hash_len); ses->hash_finalize(res, &sctx); explicit_bzero(hmac_key, sizeof(hmac_key)); } else { ses->hash_init(&sctx); if (crp->crp_aad != NULL) ses->hash_update(&sctx, crp->crp_aad, crp->crp_aad_length); else crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length, ses->hash_update, &sctx); if (CRYPTO_HAS_OUTPUT_BUFFER(crp) && CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) crypto_apply_buf(&crp->crp_obuf, crp->crp_payload_output_start, crp->crp_payload_length, ses->hash_update, &sctx); else crypto_apply(crp, crp->crp_payload_start, crp->crp_payload_length, ses->hash_update, &sctx); ses->hash_finalize(res, &sctx); } if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { uint32_t res2[SHA2_256_HASH_LEN / sizeof(uint32_t)]; crypto_copydata(crp, crp->crp_digest_start, ses->mlen, res2); if (timingsafe_bcmp(res, res2, ses->mlen) != 0) return (EBADMSG); explicit_bzero(res2, sizeof(res2)); } else crypto_copyback(crp, crp->crp_digest_start, ses->mlen, res); explicit_bzero(res, sizeof(res)); return (0); } Index: head/sys/crypto/via/padlock_cipher.c =================================================================== --- head/sys/crypto/via/padlock_cipher.c (revision 362623) +++ head/sys/crypto/via/padlock_cipher.c (revision 362624) @@ -1,241 +1,240 @@ /*- * Copyright (c) 2005-2006 Pawel Jakub Dawidek * Copyright (c) 2004 Mark R V Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $OpenBSD: via.c,v 1.3 2004/06/15 23:36:55 deraadt Exp $ */ /*- * Copyright (c) 2003 Jason Wright * Copyright (c) 2003, 2004 Theo de Raadt * All rights reserved. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define PADLOCK_ROUND_COUNT_AES128 10 #define PADLOCK_ROUND_COUNT_AES192 12 #define PADLOCK_ROUND_COUNT_AES256 14 #define PADLOCK_ALGORITHM_TYPE_AES 0 #define PADLOCK_KEY_GENERATION_HW 0 #define PADLOCK_KEY_GENERATION_SW 1 #define PADLOCK_DIRECTION_ENCRYPT 0 #define PADLOCK_DIRECTION_DECRYPT 1 #define PADLOCK_KEY_SIZE_128 0 #define PADLOCK_KEY_SIZE_192 1 #define PADLOCK_KEY_SIZE_256 2 MALLOC_DECLARE(M_PADLOCK); static __inline void padlock_cbc(void *in, void *out, size_t count, void *key, union padlock_cw *cw, void *iv) { #ifdef __GNUCLIKE_ASM /* The .byte line is really VIA C3 "xcrypt-cbc" instruction */ __asm __volatile( "pushf \n\t" "popf \n\t" "rep \n\t" ".byte 0x0f, 0xa7, 0xd0" : "+a" (iv), "+c" (count), "+D" (out), "+S" (in) : "b" (key), "d" (cw) : "cc", "memory" ); #endif } static void padlock_cipher_key_setup(struct padlock_session *ses, const void *key, int klen) { union padlock_cw *cw; int i; cw = &ses->ses_cw; if (cw->cw_key_generation == PADLOCK_KEY_GENERATION_SW) { /* Build expanded keys for both directions */ rijndaelKeySetupEnc(ses->ses_ekey, key, klen * 8); rijndaelKeySetupDec(ses->ses_dkey, key, klen * 8); for (i = 0; i < 4 * (RIJNDAEL_MAXNR + 1); i++) { ses->ses_ekey[i] = ntohl(ses->ses_ekey[i]); ses->ses_dkey[i] = ntohl(ses->ses_dkey[i]); } } else { bcopy(key, ses->ses_ekey, klen); bcopy(key, ses->ses_dkey, klen); } } int padlock_cipher_setup(struct padlock_session *ses, const struct crypto_session_params *csp) { union padlock_cw *cw; if (csp->csp_cipher_klen != 16 && csp->csp_cipher_klen != 25 && csp->csp_cipher_klen != 32) { return (EINVAL); } cw = &ses->ses_cw; bzero(cw, sizeof(*cw)); cw->cw_algorithm_type = PADLOCK_ALGORITHM_TYPE_AES; cw->cw_key_generation = PADLOCK_KEY_GENERATION_SW; cw->cw_intermediate = 0; switch (csp->csp_cipher_klen * 8) { case 128: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES128; cw->cw_key_size = PADLOCK_KEY_SIZE_128; #ifdef HW_KEY_GENERATION /* This doesn't buy us much, that's why it is commented out. */ cw->cw_key_generation = PADLOCK_KEY_GENERATION_HW; #endif break; case 192: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES192; cw->cw_key_size = PADLOCK_KEY_SIZE_192; break; case 256: cw->cw_round_count = PADLOCK_ROUND_COUNT_AES256; cw->cw_key_size = PADLOCK_KEY_SIZE_256; break; } if (csp->csp_cipher_key != NULL) { padlock_cipher_key_setup(ses, csp->csp_cipher_key, csp->csp_cipher_klen); } return (0); } /* * Function checks if the given buffer is already 16 bytes aligned. * If it is there is no need to allocate new buffer. * If it isn't, new buffer is allocated. */ static u_char * padlock_cipher_alloc(struct cryptop *crp, int *allocated) { u_char *addr; addr = crypto_contiguous_subsegment(crp, crp->crp_payload_start, crp->crp_payload_length); if (((uintptr_t)addr & 0xf) == 0) { /* 16 bytes aligned? */ *allocated = 0; return (addr); } *allocated = 1; addr = malloc(crp->crp_payload_length + 16, M_PADLOCK, M_NOWAIT); return (addr); } int padlock_cipher_process(struct padlock_session *ses, struct cryptop *crp, const struct crypto_session_params *csp) { union padlock_cw *cw; struct thread *td; u_char *buf, *abuf; uint32_t *key; uint8_t iv[AES_BLOCK_LEN] __aligned(16); int allocated; buf = padlock_cipher_alloc(crp, &allocated); if (buf == NULL) return (ENOMEM); /* Buffer has to be 16 bytes aligned. */ abuf = PADLOCK_ALIGN(buf); if (crp->crp_cipher_key != NULL) { padlock_cipher_key_setup(ses, crp->crp_cipher_key, csp->csp_cipher_klen); } cw = &ses->ses_cw; cw->cw_filler0 = 0; cw->cw_filler1 = 0; cw->cw_filler2 = 0; cw->cw_filler3 = 0; crypto_read_iv(crp, iv); if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { cw->cw_direction = PADLOCK_DIRECTION_ENCRYPT; key = ses->ses_ekey; } else { cw->cw_direction = PADLOCK_DIRECTION_DECRYPT; key = ses->ses_dkey; } if (allocated) { crypto_copydata(crp, crp->crp_payload_start, crp->crp_payload_length, abuf); } td = curthread; fpu_kern_enter(td, ses->ses_fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); padlock_cbc(abuf, abuf, crp->crp_payload_length / AES_BLOCK_LEN, key, cw, iv); fpu_kern_leave(td, ses->ses_fpu_ctx); if (allocated) { crypto_copyback(crp, crp->crp_payload_start, crp->crp_payload_length, abuf); - explicit_bzero(buf, crp->crp_payload_length + 16); - free(buf, M_PADLOCK); + zfree(buf, M_PADLOCK); } return (0); } Index: head/sys/dev/cxgbe/crypto/t4_kern_tls.c =================================================================== --- head/sys/dev/cxgbe/crypto/t4_kern_tls.c (revision 362623) +++ head/sys/dev/cxgbe/crypto/t4_kern_tls.c (revision 362624) @@ -1,2398 +1,2397 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018-2019 Chelsio Communications, Inc. * All rights reserved. * Written by: John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_tcb.h" #include "t4_l2t.h" #include "t4_clip.h" #include "t4_mp_ring.h" #include "crypto/t4_crypto.h" #if defined(INET) || defined(INET6) #define SALT_SIZE 4 #define GCM_TAG_SIZE 16 #define TLS_HEADER_LENGTH 5 #define TLS_KEY_CONTEXT_SZ roundup2(sizeof(struct tls_keyctx), 32) struct tls_scmd { __be32 seqno_numivs; __be32 ivgen_hdrlen; }; struct tls_key_req { /* FW_ULPTX_WR */ __be32 wr_hi; __be32 wr_mid; __be32 ftid; __u8 reneg_to_write_rx; __u8 protocol; __be16 mfs; /* master command */ __be32 cmd; __be32 len16; /* command length */ __be32 dlen; /* data length in 32-byte units */ __be32 kaddr; /* sub-command */ __be32 sc_more; __be32 sc_len; }__packed; struct tls_keyctx { struct tx_keyctx_hdr { __u8 ctxlen; __u8 r2; __be16 dualck_to_txvalid; __u8 txsalt[4]; __be64 r5; } txhdr; struct keys { __u8 edkey[32]; __u8 ipad[64]; __u8 opad[64]; } keys; }; #define S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 11 #define M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 0x1 #define V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) #define G_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) & \ M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) #define F_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT \ V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1U) #define S_TLS_KEYCTX_TX_WR_SALT_PRESENT 10 #define M_TLS_KEYCTX_TX_WR_SALT_PRESENT 0x1 #define V_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \ ((x) << S_TLS_KEYCTX_TX_WR_SALT_PRESENT) #define G_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_SALT_PRESENT) & \ M_TLS_KEYCTX_TX_WR_SALT_PRESENT) #define F_TLS_KEYCTX_TX_WR_SALT_PRESENT \ V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1U) #define S_TLS_KEYCTX_TX_WR_TXCK_SIZE 6 #define M_TLS_KEYCTX_TX_WR_TXCK_SIZE 0xf #define V_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXCK_SIZE) #define G_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXCK_SIZE) & \ M_TLS_KEYCTX_TX_WR_TXCK_SIZE) #define S_TLS_KEYCTX_TX_WR_TXMK_SIZE 2 #define M_TLS_KEYCTX_TX_WR_TXMK_SIZE 0xf #define V_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXMK_SIZE) #define G_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXMK_SIZE) & \ M_TLS_KEYCTX_TX_WR_TXMK_SIZE) #define S_TLS_KEYCTX_TX_WR_TXVALID 0 #define M_TLS_KEYCTX_TX_WR_TXVALID 0x1 #define V_TLS_KEYCTX_TX_WR_TXVALID(x) \ ((x) << S_TLS_KEYCTX_TX_WR_TXVALID) #define G_TLS_KEYCTX_TX_WR_TXVALID(x) \ (((x) >> S_TLS_KEYCTX_TX_WR_TXVALID) & M_TLS_KEYCTX_TX_WR_TXVALID) #define F_TLS_KEYCTX_TX_WR_TXVALID V_TLS_KEYCTX_TX_WR_TXVALID(1U) /* Key Context Programming Operation type */ #define KEY_WRITE_RX 0x1 #define KEY_WRITE_TX 0x2 #define KEY_DELETE_RX 0x4 #define KEY_DELETE_TX 0x8 struct tlspcb { struct cxgbe_snd_tag com; struct vi_info *vi; /* virtual interface */ struct adapter *sc; struct l2t_entry *l2te; /* L2 table entry used by this connection */ int tid; /* Connection identifier */ int tx_key_addr; bool inline_key; bool using_timestamps; unsigned char enc_mode; struct tls_scmd scmd0; struct tls_scmd scmd0_short; unsigned int tx_key_info_size; uint32_t prev_seq; uint32_t prev_ack; uint32_t prev_tsecr; uint16_t prev_win; uint16_t prev_mss; /* Only used outside of setup and teardown when using inline keys. */ struct tls_keyctx keyctx; /* Fields only used during setup and teardown. */ struct inpcb *inp; /* backpointer to host stack's PCB */ struct sge_txq *txq; struct sge_wrq *ctrlq; struct clip_entry *ce; /* CLIP table entry used by this tid */ unsigned char auth_mode; unsigned char hmac_ctrl; unsigned char mac_first; unsigned char iv_size; unsigned int frag_size; unsigned int cipher_secret_size; int proto_ver; bool open_pending; }; static int ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls, struct sge_txq *txq); static inline struct tlspcb * mst_to_tls(struct m_snd_tag *t) { return ((struct tlspcb *)mst_to_cst(t)); } /* XXX: There are similar versions of these two in tom/t4_tls.c. */ static int get_new_keyid(struct tlspcb *tlsp) { vmem_addr_t addr; if (vmem_alloc(tlsp->sc->key_map, TLS_KEY_CONTEXT_SZ, M_NOWAIT | M_FIRSTFIT, &addr) != 0) return (-1); return (addr); } static void free_keyid(struct tlspcb *tlsp, int keyid) { CTR3(KTR_CXGBE, "%s: tid %d key addr %#x", __func__, tlsp->tid, keyid); vmem_free(tlsp->sc->key_map, keyid, TLS_KEY_CONTEXT_SZ); } static struct tlspcb * alloc_tlspcb(struct ifnet *ifp, struct vi_info *vi, int flags) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tlspcb *tlsp; tlsp = malloc(sizeof(*tlsp), M_CXGBE, M_ZERO | flags); if (tlsp == NULL) return (NULL); cxgbe_snd_tag_init(&tlsp->com, ifp, IF_SND_TAG_TYPE_TLS); tlsp->vi = vi; tlsp->sc = sc; tlsp->ctrlq = &sc->sge.ctrlq[pi->port_id]; tlsp->tid = -1; tlsp->tx_key_addr = -1; return (tlsp); } static void init_ktls_key_params(struct tlspcb *tlsp, const struct ktls_session *tls) { int mac_key_size; if (tls->params.tls_vminor == TLS_MINOR_VER_ONE) tlsp->proto_ver = SCMD_PROTO_VERSION_TLS_1_1; else tlsp->proto_ver = SCMD_PROTO_VERSION_TLS_1_2; tlsp->cipher_secret_size = tls->params.cipher_key_len; tlsp->tx_key_info_size = sizeof(struct tx_keyctx_hdr) + tlsp->cipher_secret_size; if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { tlsp->auth_mode = SCMD_AUTH_MODE_GHASH; tlsp->enc_mode = SCMD_CIPH_MODE_AES_GCM; tlsp->iv_size = 4; tlsp->mac_first = 0; tlsp->hmac_ctrl = SCMD_HMAC_CTRL_NOP; tlsp->tx_key_info_size += GMAC_BLOCK_LEN; } else { switch (tls->params.auth_algorithm) { case CRYPTO_SHA1_HMAC: mac_key_size = roundup2(SHA1_HASH_LEN, 16); tlsp->auth_mode = SCMD_AUTH_MODE_SHA1; break; case CRYPTO_SHA2_256_HMAC: mac_key_size = SHA2_256_HASH_LEN; tlsp->auth_mode = SCMD_AUTH_MODE_SHA256; break; case CRYPTO_SHA2_384_HMAC: mac_key_size = SHA2_512_HASH_LEN; tlsp->auth_mode = SCMD_AUTH_MODE_SHA512_384; break; } tlsp->enc_mode = SCMD_CIPH_MODE_AES_CBC; tlsp->iv_size = 8; /* for CBC, iv is 16B, unit of 2B */ tlsp->mac_first = 1; tlsp->hmac_ctrl = SCMD_HMAC_CTRL_NO_TRUNC; tlsp->tx_key_info_size += mac_key_size * 2; } tlsp->frag_size = tls->params.max_frame_len; } static int ktls_act_open_cpl_size(bool isipv6) { if (isipv6) return (sizeof(struct cpl_t6_act_open_req6)); else return (sizeof(struct cpl_t6_act_open_req)); } static void mk_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, struct inpcb *inp, struct tlspcb *tlsp, int atid, void *dst) { struct tcpcb *tp = intotcpcb(inp); struct cpl_t6_act_open_req *cpl6; struct cpl_act_open_req *cpl; uint64_t options; int qid_atid; cpl6 = dst; cpl = (struct cpl_act_open_req *)cpl6; INIT_TP_WR(cpl6, 0); qid_atid = V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | V_TID_COOKIE(CPL_COOKIE_KERN_TLS); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); options = F_TCAM_BYPASS | V_ULP_MODE(ULP_MODE_NONE); options |= V_SMAC_SEL(vi->smt_idx) | V_TX_CHAN(vi->pi->tx_chan); options |= F_NON_OFFLOAD; cpl->opt0 = htobe64(options); options = V_TX_QUEUE(sc->params.tp.tx_modq[vi->pi->tx_chan]); if (tp->t_flags & TF_REQ_TSTMP) options |= F_TSTAMPS_EN; cpl->opt2 = htobe32(options); } static void mk_ktls_act_open_req6(struct adapter *sc, struct vi_info *vi, struct inpcb *inp, struct tlspcb *tlsp, int atid, void *dst) { struct tcpcb *tp = intotcpcb(inp); struct cpl_t6_act_open_req6 *cpl6; struct cpl_act_open_req6 *cpl; uint64_t options; int qid_atid; cpl6 = dst; cpl = (struct cpl_act_open_req6 *)cpl6; INIT_TP_WR(cpl6, 0); qid_atid = V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | V_TID_COOKIE(CPL_COOKIE_KERN_TLS); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); cpl->local_port = inp->inp_lport; cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; options = F_TCAM_BYPASS | V_ULP_MODE(ULP_MODE_NONE); options |= V_SMAC_SEL(vi->smt_idx) | V_TX_CHAN(vi->pi->tx_chan); options |= F_NON_OFFLOAD; cpl->opt0 = htobe64(options); options = V_TX_QUEUE(sc->params.tp.tx_modq[vi->pi->tx_chan]); if (tp->t_flags & TF_REQ_TSTMP) options |= F_TSTAMPS_EN; cpl->opt2 = htobe32(options); } static int send_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, struct inpcb *inp, struct tlspcb *tlsp, int atid) { struct wrqe *wr; bool isipv6; isipv6 = (inp->inp_vflag & INP_IPV6) != 0; if (isipv6) { tlsp->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); if (tlsp->ce == NULL) return (ENOENT); } wr = alloc_wrqe(ktls_act_open_cpl_size(isipv6), tlsp->ctrlq); if (wr == NULL) { CTR2(KTR_CXGBE, "%s: atid %d failed to alloc WR", __func__, atid); return (ENOMEM); } if (isipv6) mk_ktls_act_open_req6(sc, vi, inp, tlsp, atid, wrtod(wr)); else mk_ktls_act_open_req(sc, vi, inp, tlsp, atid, wrtod(wr)); tlsp->open_pending = true; t4_wrq_tx(sc, wr); return (0); } static int ktls_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct tlspcb *tlsp = lookup_atid(sc, atid); struct inpcb *inp = tlsp->inp; CTR3(KTR_CXGBE, "%s: atid %d status %d", __func__, atid, status); free_atid(sc, atid); if (status == 0) tlsp->tid = GET_TID(cpl); INP_WLOCK(inp); tlsp->open_pending = false; wakeup(tlsp); INP_WUNLOCK(inp); return (0); } /* SET_TCB_FIELD sent as a ULP command looks like this */ #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) _Static_assert((LEN__SET_TCB_FIELD_ULP + sizeof(struct ulptx_idata)) % 16 == 0, "CPL_SET_TCB_FIELD ULP command not 16-byte aligned"); static void write_set_tcb_field_ulp(struct tlspcb *tlsp, void *dst, struct sge_txq *txq, uint16_t word, uint64_t mask, uint64_t val) { struct ulp_txpkt *txpkt; struct ulptx_idata *idata; struct cpl_set_tcb_field_core *cpl; /* ULP_TXPKT */ txpkt = dst; txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DATAMODIFY(0) | V_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) | V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1)); txpkt->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); /* ULPTX_IDATA sub-command */ idata = (struct ulptx_idata *)(txpkt + 1); idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); idata->len = htobe32(sizeof(*cpl)); /* CPL_SET_TCB_FIELD */ cpl = (struct cpl_set_tcb_field_core *)(idata + 1); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tlsp->tid)); cpl->reply_ctrl = htobe16(F_NO_REPLY); cpl->word_cookie = htobe16(V_WORD(word)); cpl->mask = htobe64(mask); cpl->val = htobe64(val); /* ULPTX_NOOP */ idata = (struct ulptx_idata *)(cpl + 1); idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); idata->len = htobe32(0); } static int ktls_set_tcb_fields(struct tlspcb *tlsp, struct tcpcb *tp, struct sge_txq *txq) { struct fw_ulptx_wr *wr; struct mbuf *m; char *dst; void *items[1]; int error, len; len = sizeof(*wr) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); if (tp->t_flags & TF_REQ_TSTMP) len += roundup2(LEN__SET_TCB_FIELD_ULP, 16); m = alloc_wr_mbuf(len, M_NOWAIT); if (m == NULL) { CTR2(KTR_CXGBE, "%s: tid %d failed to alloc WR mbuf", __func__, tlsp->tid); return (ENOMEM); } m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com.com); m->m_pkthdr.csum_flags |= CSUM_SND_TAG; /* FW_ULPTX_WR */ wr = mtod(m, void *); wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR)); wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA | V_FW_WR_LEN16(len / 16)); wr->cookie = 0; dst = (char *)(wr + 1); /* Clear TF_NON_OFFLOAD and set TF_CORE_BYPASS */ write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_T_FLAGS, V_TCB_T_FLAGS(V_TF_CORE_BYPASS(1) | V_TF_NON_OFFLOAD(1)), V_TCB_T_FLAGS(V_TF_CORE_BYPASS(1))); dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); /* Clear the SND_UNA_RAW, SND_NXT_RAW, and SND_MAX_RAW offsets. */ write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_SND_UNA_RAW, V_TCB_SND_NXT_RAW(M_TCB_SND_NXT_RAW) | V_TCB_SND_UNA_RAW(M_TCB_SND_UNA_RAW), V_TCB_SND_NXT_RAW(0) | V_TCB_SND_UNA_RAW(0)); dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_SND_MAX_RAW, V_TCB_SND_MAX_RAW(M_TCB_SND_MAX_RAW), V_TCB_SND_MAX_RAW(0)); dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); if (tp->t_flags & TF_REQ_TSTMP) { write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_TIMESTAMP_OFFSET, V_TCB_TIMESTAMP_OFFSET(M_TCB_TIMESTAMP_OFFSET), V_TCB_TIMESTAMP_OFFSET(tp->ts_offset >> 28)); dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); } KASSERT(dst - (char *)wr == len, ("%s: length mismatch", __func__)); items[0] = m; error = mp_ring_enqueue(txq->r, items, 1, 1); if (error) m_free(m); return (error); } int cxgbe_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **pt) { const struct ktls_session *tls; struct tlspcb *tlsp; struct adapter *sc; struct vi_info *vi; struct inpcb *inp; struct tcpcb *tp; struct sge_txq *txq; int atid, error, keyid; tls = params->tls.tls; /* Only TLS 1.1 and TLS 1.2 are currently supported. */ if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || tls->params.tls_vminor < TLS_MINOR_VER_ONE || tls->params.tls_vminor > TLS_MINOR_VER_TWO) return (EPROTONOSUPPORT); /* Sanity check values in *tls. */ switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: /* XXX: Explicitly ignore any provided IV. */ switch (tls->params.cipher_key_len) { case 128 / 8: case 192 / 8: case 256 / 8: break; default: return (EINVAL); } switch (tls->params.auth_algorithm) { case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: break; default: return (EPROTONOSUPPORT); } break; case CRYPTO_AES_NIST_GCM_16: if (tls->params.iv_len != SALT_SIZE) return (EINVAL); switch (tls->params.cipher_key_len) { case 128 / 8: case 192 / 8: case 256 / 8: break; default: return (EINVAL); } break; default: return (EPROTONOSUPPORT); } vi = ifp->if_softc; sc = vi->adapter; tlsp = alloc_tlspcb(ifp, vi, M_WAITOK); atid = alloc_atid(sc, tlsp); if (atid < 0) { error = ENOMEM; goto failed; } if (sc->tlst.inline_keys) keyid = -1; else keyid = get_new_keyid(tlsp); if (keyid < 0) { CTR2(KTR_CXGBE, "%s: atid %d using immediate key ctx", __func__, atid); tlsp->inline_key = true; } else { tlsp->tx_key_addr = keyid; CTR3(KTR_CXGBE, "%s: atid %d allocated TX key addr %#x", __func__, atid, tlsp->tx_key_addr); } inp = params->tls.inp; INP_RLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_RUNLOCK(inp); error = ECONNRESET; goto failed; } tlsp->inp = inp; tp = inp->inp_ppcb; if (tp->t_flags & TF_REQ_TSTMP) { tlsp->using_timestamps = true; if ((tp->ts_offset & 0xfffffff) != 0) { INP_RUNLOCK(inp); error = EINVAL; goto failed; } } else tlsp->using_timestamps = false; error = send_ktls_act_open_req(sc, vi, inp, tlsp, atid); if (error) { INP_RUNLOCK(inp); goto failed; } /* Wait for reply to active open. */ CTR2(KTR_CXGBE, "%s: atid %d sent CPL_ACT_OPEN_REQ", __func__, atid); while (tlsp->open_pending) { /* * XXX: PCATCH? We would then have to discard the PCB * when the completion CPL arrived. */ error = rw_sleep(tlsp, &inp->inp_lock, 0, "t6tlsop", 0); } atid = -1; if (tlsp->tid < 0) { INP_RUNLOCK(inp); error = ENOMEM; goto failed; } if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_RUNLOCK(inp); error = ECONNRESET; goto failed; } txq = &sc->sge.txq[vi->first_txq]; if (inp->inp_flowtype != M_HASHTYPE_NONE) txq += ((inp->inp_flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); tlsp->txq = txq; error = ktls_set_tcb_fields(tlsp, tp, txq); INP_RUNLOCK(inp); if (error) goto failed; init_ktls_key_params(tlsp, tls); error = ktls_setup_keys(tlsp, tls, txq); if (error) goto failed; /* The SCMD fields used when encrypting a full TLS record. */ tlsp->scmd0.seqno_numivs = htobe32(V_SCMD_SEQ_NO_CTRL(3) | V_SCMD_PROTO_VERSION(tlsp->proto_ver) | V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) | V_SCMD_CIPH_AUTH_SEQ_CTRL((tlsp->mac_first == 0)) | V_SCMD_CIPH_MODE(tlsp->enc_mode) | V_SCMD_AUTH_MODE(tlsp->auth_mode) | V_SCMD_HMAC_CTRL(tlsp->hmac_ctrl) | V_SCMD_IV_SIZE(tlsp->iv_size) | V_SCMD_NUM_IVS(1)); tlsp->scmd0.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) | V_SCMD_TLS_FRAG_ENABLE(0); if (tlsp->inline_key) tlsp->scmd0.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1); tlsp->scmd0.ivgen_hdrlen = htobe32(tlsp->scmd0.ivgen_hdrlen); /* * The SCMD fields used when encrypting a partial TLS record * (no trailer and possibly a truncated payload). */ tlsp->scmd0_short.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) | V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) | V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) | V_SCMD_CIPH_AUTH_SEQ_CTRL((tlsp->mac_first == 0)) | V_SCMD_AUTH_MODE(SCMD_AUTH_MODE_NOP) | V_SCMD_HMAC_CTRL(SCMD_HMAC_CTRL_NOP) | V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(0); if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) tlsp->scmd0_short.seqno_numivs |= V_SCMD_CIPH_MODE(SCMD_CIPH_MODE_AES_CTR); else tlsp->scmd0_short.seqno_numivs |= V_SCMD_CIPH_MODE(tlsp->enc_mode); tlsp->scmd0_short.seqno_numivs = htobe32(tlsp->scmd0_short.seqno_numivs); tlsp->scmd0_short.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) | V_SCMD_TLS_FRAG_ENABLE(0) | V_SCMD_AADIVDROP(1); if (tlsp->inline_key) tlsp->scmd0_short.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1); TXQ_LOCK(txq); if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) txq->kern_tls_gcm++; else txq->kern_tls_cbc++; TXQ_UNLOCK(txq); *pt = &tlsp->com.com; return (0); failed: if (atid >= 0) free_atid(sc, atid); m_snd_tag_rele(&tlsp->com.com); return (error); } static int ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls, struct sge_txq *txq) { struct auth_hash *axf; int error, keyid, kwrlen, kctxlen, len; struct tls_key_req *kwr; struct tls_keyctx *kctx; void *items[1], *key; struct tx_keyctx_hdr *khdr; unsigned int ck_size, mk_size, partial_digest_len; struct mbuf *m; /* * Store the salt and keys in the key context. For * connections with an inline key, this key context is passed * as immediate data in each work request. For connections * storing the key in DDR, a work request is used to store a * copy of the key context in DDR. */ kctx = &tlsp->keyctx; khdr = &kctx->txhdr; switch (tlsp->cipher_secret_size) { case 128 / 8: ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; break; case 192 / 8: ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; break; case 256 / 8: ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; break; default: panic("bad key size"); } axf = NULL; partial_digest_len = 0; if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; else { switch (tlsp->auth_mode) { case SCMD_AUTH_MODE_SHA1: axf = &auth_hash_hmac_sha1; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_160; partial_digest_len = SHA1_HASH_LEN; break; case SCMD_AUTH_MODE_SHA256: axf = &auth_hash_hmac_sha2_256; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; partial_digest_len = SHA2_256_HASH_LEN; break; case SCMD_AUTH_MODE_SHA512_384: axf = &auth_hash_hmac_sha2_384; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; partial_digest_len = SHA2_512_HASH_LEN; break; default: panic("bad auth mode"); } } khdr->ctxlen = (tlsp->tx_key_info_size >> 4); khdr->dualck_to_txvalid = V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1) | V_TLS_KEYCTX_TX_WR_TXCK_SIZE(ck_size) | V_TLS_KEYCTX_TX_WR_TXMK_SIZE(mk_size) | V_TLS_KEYCTX_TX_WR_TXVALID(1); if (tlsp->enc_mode != SCMD_CIPH_MODE_AES_GCM) khdr->dualck_to_txvalid |= V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1); khdr->dualck_to_txvalid = htobe16(khdr->dualck_to_txvalid); key = kctx->keys.edkey; memcpy(key, tls->params.cipher_key, tls->params.cipher_key_len); if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { memcpy(khdr->txsalt, tls->params.iv, SALT_SIZE); t4_init_gmac_hash(tls->params.cipher_key, tls->params.cipher_key_len, (char *)key + tls->params.cipher_key_len); } else { t4_init_hmac_digest(axf, partial_digest_len, tls->params.auth_key, tls->params.auth_key_len, (char *)key + tls->params.cipher_key_len); } if (tlsp->inline_key) return (0); keyid = tlsp->tx_key_addr; /* Populate key work request. */ kwrlen = sizeof(*kwr); kctxlen = roundup2(sizeof(*kctx), 32); len = kwrlen + kctxlen; m = alloc_wr_mbuf(len, M_NOWAIT); if (m == NULL) { CTR2(KTR_CXGBE, "%s: tid %d failed to alloc WR mbuf", __func__, tlsp->tid); return (ENOMEM); } m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com.com); m->m_pkthdr.csum_flags |= CSUM_SND_TAG; kwr = mtod(m, void *); memset(kwr, 0, len); kwr->wr_hi = htobe32(V_FW_WR_OP(FW_ULPTX_WR) | F_FW_WR_ATOMIC); kwr->wr_mid = htobe32(V_FW_WR_LEN16(DIV_ROUND_UP(len, 16))); kwr->protocol = tlsp->proto_ver; kwr->mfs = htons(tlsp->frag_size); kwr->reneg_to_write_rx = KEY_WRITE_TX; /* master command */ kwr->cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | V_T5_ULP_MEMIO_ORDER(1) | V_T5_ULP_MEMIO_IMM(1)); kwr->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(kctxlen >> 5)); kwr->len16 = htobe32((tlsp->tid << 8) | DIV_ROUND_UP(len - sizeof(struct work_request_hdr), 16)); kwr->kaddr = htobe32(V_ULP_MEMIO_ADDR(keyid >> 5)); /* sub command */ kwr->sc_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); kwr->sc_len = htobe32(kctxlen); kctx = (struct tls_keyctx *)(kwr + 1); memcpy(kctx, &tlsp->keyctx, sizeof(*kctx)); /* * Place the key work request in the transmit queue. It * should be sent to the NIC before any TLS packets using this * session. */ items[0] = m; error = mp_ring_enqueue(txq->r, items, 1, 1); if (error) m_free(m); else CTR2(KTR_CXGBE, "%s: tid %d sent key WR", __func__, tlsp->tid); return (error); } static u_int ktls_base_wr_size(struct tlspcb *tlsp) { u_int wr_len; wr_len = sizeof(struct fw_ulptx_wr); // 16 wr_len += sizeof(struct ulp_txpkt); // 8 wr_len += sizeof(struct ulptx_idata); // 8 wr_len += sizeof(struct cpl_tx_sec_pdu);// 32 if (tlsp->inline_key) wr_len += tlsp->tx_key_info_size; else { wr_len += sizeof(struct ulptx_sc_memrd);// 8 wr_len += sizeof(struct ulptx_idata); // 8 } wr_len += sizeof(struct cpl_tx_data); // 16 return (wr_len); } /* How many bytes of TCP payload to send for a given TLS record. */ static u_int ktls_tcp_payload_length(struct tlspcb *tlsp, struct mbuf *m_tls) { struct tls_record_layer *hdr; u_int plen, mlen; M_ASSERTEXTPG(m_tls); hdr = (void *)m_tls->m_epg_hdr; plen = ntohs(hdr->tls_length); /* * What range of the TLS record is the mbuf requesting to be * sent. */ mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len; /* Always send complete records. */ if (mlen == TLS_HEADER_LENGTH + plen) return (mlen); /* * If the host stack has asked to send part of the trailer, * trim the length to avoid sending any of the trailer. There * is no way to send a partial trailer currently. */ if (mlen > TLS_HEADER_LENGTH + plen - m_tls->m_epg_trllen) mlen = TLS_HEADER_LENGTH + plen - m_tls->m_epg_trllen; /* * For AES-CBC adjust the ciphertext length for the block * size. */ if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC && mlen > TLS_HEADER_LENGTH) { mlen = TLS_HEADER_LENGTH + rounddown(mlen - TLS_HEADER_LENGTH, AES_BLOCK_LEN); } #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d short TLS record (%u vs %u)", __func__, tlsp->tid, mlen, TLS_HEADER_LENGTH + plen); #endif return (mlen); } /* * For a "short" TLS record, determine the offset into the TLS record * payload to send. This offset does not include the TLS header, but * a non-zero offset implies that a header will not be sent. */ static u_int ktls_payload_offset(struct tlspcb *tlsp, struct mbuf *m_tls) { struct tls_record_layer *hdr; u_int offset, plen; #ifdef INVARIANTS u_int mlen; #endif M_ASSERTEXTPG(m_tls); hdr = (void *)m_tls->m_epg_hdr; plen = ntohs(hdr->tls_length); #ifdef INVARIANTS mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len; MPASS(mlen < TLS_HEADER_LENGTH + plen); #endif if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen) return (0); if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { /* * Always send something. This function is only called * if we aren't sending the tag at all, but if the * request starts in the tag then we are in an odd * state where would effectively send nothing. Cap * the offset at the last byte of the record payload * to send the last cipher block. */ offset = min(mtod(m_tls, vm_offset_t) - m_tls->m_epg_hdrlen, (plen - TLS_HEADER_LENGTH - m_tls->m_epg_trllen) - 1); return (rounddown(offset, AES_BLOCK_LEN)); } return (0); } static u_int ktls_sgl_size(u_int nsegs) { u_int wr_len; /* First segment is part of ulptx_sgl. */ nsegs--; wr_len = sizeof(struct ulptx_sgl); wr_len += 8 * ((3 * nsegs) / 2 + (nsegs & 1)); return (wr_len); } static int ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls, int *nsegsp) { struct tls_record_layer *hdr; u_int imm_len, offset, plen, wr_len, tlen; M_ASSERTEXTPG(m_tls); /* * Determine the size of the TLS record payload to send * excluding header and trailer. */ tlen = ktls_tcp_payload_length(tlsp, m_tls); if (tlen <= m_tls->m_epg_hdrlen) { /* * For requests that only want to send the TLS header, * send a tunnelled packet as immediate data. */ wr_len = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + roundup2(m->m_len + m_tls->m_len, 16); if (wr_len > SGE_MAX_WR_LEN) { CTR3(KTR_CXGBE, "%s: tid %d TLS header-only packet too long (len %d)", __func__, tlsp->tid, m->m_len + m_tls->m_len); } /* This should always be the last TLS record in a chain. */ MPASS(m_tls->m_next == NULL); /* * XXX: Set a bogus 'nsegs' value to avoid tripping an * assertion in mbuf_nsegs() in t4_sge.c. */ *nsegsp = 1; return (wr_len); } hdr = (void *)m_tls->m_epg_hdr; plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen; if (tlen < plen) { plen = tlen; offset = ktls_payload_offset(tlsp, m_tls); } else offset = 0; /* Calculate the size of the work request. */ wr_len = ktls_base_wr_size(tlsp); /* * Full records and short records with an offset of 0 include * the TLS header as immediate data. Short records include a * raw AES IV as immediate data. */ imm_len = 0; if (offset == 0) imm_len += m_tls->m_epg_hdrlen; if (plen == tlen) imm_len += AES_BLOCK_LEN; wr_len += roundup2(imm_len, 16); /* TLS record payload via DSGL. */ *nsegsp = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset, plen - (m_tls->m_epg_hdrlen + offset)); wr_len += ktls_sgl_size(*nsegsp); wr_len = roundup2(wr_len, 16); return (wr_len); } /* * See if we have any TCP options requiring a dedicated options-only * packet. */ static int ktls_has_tcp_options(struct tcphdr *tcp) { u_char *cp; int cnt, opt, optlen; cp = (u_char *)(tcp + 1); cnt = tcp->th_off * 4 - sizeof(struct tcphdr); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { if (cnt < 2) break; optlen = cp[1]; if (optlen < 2 || optlen > cnt) break; } switch (opt) { case TCPOPT_NOP: case TCPOPT_TIMESTAMP: break; default: return (1); } } return (0); } /* * Find the TCP timestamp option. */ static void * ktls_find_tcp_timestamps(struct tcphdr *tcp) { u_char *cp; int cnt, opt, optlen; cp = (u_char *)(tcp + 1); cnt = tcp->th_off * 4 - sizeof(struct tcphdr); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { if (cnt < 2) break; optlen = cp[1]; if (optlen < 2 || optlen > cnt) break; } if (opt == TCPOPT_TIMESTAMP && optlen == TCPOLEN_TIMESTAMP) return (cp + 2); } return (NULL); } int t6_ktls_parse_pkt(struct mbuf *m, int *nsegsp, int *len16p) { struct tlspcb *tlsp; struct ether_header *eh; struct ip *ip; struct ip6_hdr *ip6; struct tcphdr *tcp; struct mbuf *m_tls; int nsegs; u_int wr_len, tot_len; /* * Locate headers in initial mbuf. * * XXX: This assumes all of the headers are in the initial mbuf. * Could perhaps use m_advance() like parse_pkt() if that turns * out to not be true. */ M_ASSERTPKTHDR(m); MPASS(m->m_pkthdr.snd_tag != NULL); tlsp = mst_to_tls(m->m_pkthdr.snd_tag); if (m->m_len <= sizeof(*eh) + sizeof(*ip)) { CTR2(KTR_CXGBE, "%s: tid %d header mbuf too short", __func__, tlsp->tid); return (EINVAL); } eh = mtod(m, struct ether_header *); if (ntohs(eh->ether_type) != ETHERTYPE_IP && ntohs(eh->ether_type) != ETHERTYPE_IPV6) { CTR2(KTR_CXGBE, "%s: tid %d mbuf not ETHERTYPE_IP{,V6}", __func__, tlsp->tid); return (EINVAL); } m->m_pkthdr.l2hlen = sizeof(*eh); /* XXX: Reject unsupported IP options? */ if (ntohs(eh->ether_type) == ETHERTYPE_IP) { ip = (struct ip *)(eh + 1); if (ip->ip_p != IPPROTO_TCP) { CTR2(KTR_CXGBE, "%s: tid %d mbuf not IPPROTO_TCP", __func__, tlsp->tid); return (EINVAL); } m->m_pkthdr.l3hlen = ip->ip_hl * 4; } else { ip6 = (struct ip6_hdr *)(eh + 1); if (ip6->ip6_nxt != IPPROTO_TCP) { CTR3(KTR_CXGBE, "%s: tid %d mbuf not IPPROTO_TCP (%u)", __func__, tlsp->tid, ip6->ip6_nxt); return (EINVAL); } m->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); } if (m->m_len < m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)) { CTR2(KTR_CXGBE, "%s: tid %d header mbuf too short (2)", __func__, tlsp->tid); return (EINVAL); } tcp = (struct tcphdr *)((char *)(eh + 1) + m->m_pkthdr.l3hlen); m->m_pkthdr.l4hlen = tcp->th_off * 4; /* Bail if there is TCP payload before the TLS record. */ if (m->m_len != m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + m->m_pkthdr.l4hlen) { CTR6(KTR_CXGBE, "%s: tid %d header mbuf bad length (%d + %d + %d != %d)", __func__, tlsp->tid, m->m_pkthdr.l2hlen, m->m_pkthdr.l3hlen, m->m_pkthdr.l4hlen, m->m_len); return (EINVAL); } /* Assume all headers are in 'm' for now. */ MPASS(m->m_next != NULL); MPASS(m->m_next->m_flags & M_EXTPG); tot_len = 0; /* * Each of the remaining mbufs in the chain should reference a * TLS record. */ *nsegsp = 0; for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) { MPASS(m_tls->m_flags & M_EXTPG); wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d wr_len %d nsegs %d", __func__, tlsp->tid, wr_len, nsegs); #endif if (wr_len > SGE_MAX_WR_LEN || nsegs > TX_SGL_SEGS) return (EFBIG); tot_len += roundup2(wr_len, EQ_ESIZE); /* * Store 'nsegs' for the first TLS record in the * header mbuf's metadata. */ if (*nsegsp == 0) *nsegsp = nsegs; } MPASS(tot_len != 0); /* * See if we have any TCP options or a FIN requiring a * dedicated packet. */ if ((tcp->th_flags & TH_FIN) != 0 || ktls_has_tcp_options(tcp)) { wr_len = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + roundup2(m->m_len, 16); if (wr_len > SGE_MAX_WR_LEN) { CTR3(KTR_CXGBE, "%s: tid %d options-only packet too long (len %d)", __func__, tlsp->tid, m->m_len); return (EINVAL); } tot_len += roundup2(wr_len, EQ_ESIZE); } /* Include room for a TP work request to program an L2T entry. */ tot_len += EQ_ESIZE; /* * Include room for a ULPTX work request including up to 5 * CPL_SET_TCB_FIELD commands before the first TLS work * request. */ wr_len = sizeof(struct fw_ulptx_wr) + 5 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); /* * If timestamps are present, reserve 1 more command for * setting the echoed timestamp. */ if (tlsp->using_timestamps) wr_len += roundup2(LEN__SET_TCB_FIELD_ULP, 16); tot_len += roundup2(wr_len, EQ_ESIZE); *len16p = tot_len / 16; #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d len16 %d nsegs %d", __func__, tlsp->tid, *len16p, *nsegsp); #endif return (0); } /* * If the SGL ends on an address that is not 16 byte aligned, this function will * add a 0 filled flit at the end. */ static void write_gl_to_buf(struct sglist *gl, caddr_t to) { struct sglist_seg *seg; __be64 *flitp; struct ulptx_sgl *usgl; int i, nflits, nsegs; KASSERT(((uintptr_t)to & 0xf) == 0, ("%s: SGL must start at a 16 byte boundary: %p", __func__, to)); nsegs = gl->sg_nseg; MPASS(nsegs > 0); nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; flitp = (__be64 *)to; seg = &gl->sg_segs[0]; usgl = (void *)flitp; usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); usgl->len0 = htobe32(seg->ss_len); usgl->addr0 = htobe64(seg->ss_paddr); seg++; for (i = 0; i < nsegs - 1; i++, seg++) { usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); flitp += nflits; if (nflits & 1) { MPASS(((uintptr_t)flitp) & 0xf); *flitp++ = 0; } MPASS((((uintptr_t)flitp) & 0xf) == 0); } static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) { MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)&eq->desc[eq->sidx])) { bcopy(from, *to, len); (*to) += len; if ((uintptr_t)(*to) == (uintptr_t)&eq->desc[eq->sidx]) (*to) = (caddr_t)eq->desc; } else { int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); bcopy(from, *to, portion); from += portion; portion = len - portion; /* remaining */ bcopy(from, (void *)eq->desc, portion); (*to) = (caddr_t)eq->desc + portion; } } static int ktls_write_tcp_options(struct sge_txq *txq, void *dst, struct mbuf *m, u_int available, u_int pidx) { struct tx_sdesc *txsd; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; uint64_t ctrl1; int len16, ndesc, pktlen; struct ether_header *eh; struct ip *ip, newip; struct ip6_hdr *ip6, newip6; struct tcphdr *tcp, newtcp; caddr_t out; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m); wr = dst; pktlen = m->m_len; ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen; len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16); ndesc = tx_len16_to_desc(len16); MPASS(ndesc <= available); /* Firmware work request header */ wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; cpl = (void *)(wr + 1); /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); out = (void *)(cpl + 1); /* Copy over Ethernet header. */ eh = mtod(m, struct ether_header *); copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen); /* Fixup length in IP header and copy out. */ if (ntohs(eh->ether_type) == ETHERTYPE_IP) { ip = (void *)((char *)eh + m->m_pkthdr.l2hlen); newip = *ip; newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen); copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip)); if (m->m_pkthdr.l3hlen > sizeof(*ip)) copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out, m->m_pkthdr.l3hlen - sizeof(*ip)); ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) | V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) | V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen); } else { ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen); newip6 = *ip6; newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen); copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6)); MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6)); ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) | V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) | V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen); } cpl->ctrl1 = htobe64(ctrl1); txq->txcsum++; /* Clear PUSH and FIN in the TCP header if present. */ tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); newtcp = *tcp; newtcp.th_flags &= ~(TH_PUSH | TH_FIN); copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp)); /* Copy rest of packet. */ copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, pktlen - (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); txq->imm_wrs++; txq->txpkt_wrs++; txq->kern_tls_options++; txsd = &txq->sdesc[pidx]; txsd->m = NULL; txsd->desc_used = ndesc; return (ndesc); } static int ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m, struct mbuf *m_tls, u_int available, tcp_seq tcp_seqno, u_int pidx) { struct tx_sdesc *txsd; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; uint64_t ctrl1; int len16, ndesc, pktlen; struct ether_header *eh; struct ip *ip, newip; struct ip6_hdr *ip6, newip6; struct tcphdr *tcp, newtcp; caddr_t out; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m); /* Locate the template TLS header. */ M_ASSERTEXTPG(m_tls); /* This should always be the last TLS record in a chain. */ MPASS(m_tls->m_next == NULL); wr = dst; pktlen = m->m_len + m_tls->m_len; ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen; len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16); ndesc = tx_len16_to_desc(len16); MPASS(ndesc <= available); /* Firmware work request header */ wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; cpl = (void *)(wr + 1); /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); out = (void *)(cpl + 1); /* Copy over Ethernet header. */ eh = mtod(m, struct ether_header *); copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen); /* Fixup length in IP header and copy out. */ if (ntohs(eh->ether_type) == ETHERTYPE_IP) { ip = (void *)((char *)eh + m->m_pkthdr.l2hlen); newip = *ip; newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen); copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip)); if (m->m_pkthdr.l3hlen > sizeof(*ip)) copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out, m->m_pkthdr.l3hlen - sizeof(*ip)); ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) | V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) | V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen); } else { ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen); newip6 = *ip6; newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen); copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6)); MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6)); ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) | V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) | V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen); } cpl->ctrl1 = htobe64(ctrl1); txq->txcsum++; /* Set sequence number in TCP header. */ tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); newtcp = *tcp; newtcp.th_seq = htonl(tcp_seqno + mtod(m_tls, vm_offset_t)); copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp)); /* Copy rest of TCP header. */ copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len - (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); /* Copy the subset of the TLS header requested. */ copy_to_txd(&txq->eq, (char *)m_tls->m_epg_hdr + mtod(m_tls, vm_offset_t), &out, m_tls->m_len); txq->imm_wrs++; txq->txpkt_wrs++; txq->kern_tls_header++; txsd = &txq->sdesc[pidx]; txsd->m = m; txsd->desc_used = ndesc; return (ndesc); } _Static_assert(sizeof(struct cpl_set_tcb_field) <= EQ_ESIZE, "CPL_SET_TCB_FIELD must be smaller than a single TX descriptor"); _Static_assert(W_TCB_SND_UNA_RAW == W_TCB_SND_NXT_RAW, "SND_NXT_RAW and SND_UNA_RAW are in different words"); static int ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq, void *dst, struct mbuf *m, struct tcphdr *tcp, struct mbuf *m_tls, u_int nsegs, u_int available, tcp_seq tcp_seqno, uint32_t *tsopt, u_int pidx, bool set_l2t_idx) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct fw_ulptx_wr *wr; struct ulp_txpkt *txpkt; struct ulptx_sc_memrd *memrd; struct ulptx_idata *idata; struct cpl_tx_sec_pdu *sec_pdu; struct cpl_tx_data *tx_data; struct tls_record_layer *hdr; char *iv, *out; u_int aad_start, aad_stop; u_int auth_start, auth_stop, auth_insert; u_int cipher_start, cipher_stop, iv_offset; u_int imm_len, mss, ndesc, offset, plen, tlen, twr_len, wr_len; u_int fields, tx_max_offset, tx_max; bool first_wr, last_wr, using_scratch; ndesc = 0; MPASS(tlsp->txq == txq); first_wr = (tlsp->prev_seq == 0 && tlsp->prev_ack == 0 && tlsp->prev_win == 0); /* * Use the per-txq scratch pad if near the end of the ring to * simplify handling of wrap-around. This uses a simple but * not quite perfect test of using the scratch buffer if we * can't fit a maximal work request in without wrapping. */ using_scratch = (eq->sidx - pidx < SGE_MAX_WR_LEN / EQ_ESIZE); /* Locate the TLS header. */ M_ASSERTEXTPG(m_tls); hdr = (void *)m_tls->m_epg_hdr; plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen; /* Determine how much of the TLS record to send. */ tlen = ktls_tcp_payload_length(tlsp, m_tls); if (tlen <= m_tls->m_epg_hdrlen) { /* * For requests that only want to send the TLS header, * send a tunnelled packet as immediate data. */ #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d header-only TLS record %u", __func__, tlsp->tid, (u_int)m_tls->m_epg_seqno); #endif return (ktls_write_tunnel_packet(txq, dst, m, m_tls, available, tcp_seqno, pidx)); } if (tlen < plen) { plen = tlen; offset = ktls_payload_offset(tlsp, m_tls); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d short TLS record %u with offset %u", __func__, tlsp->tid, (u_int)m_tls->m_epg_seqno, offset); #endif if (m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) != 0) { txq->kern_tls_fin_short++; #ifdef INVARIANTS panic("%s: FIN on short TLS record", __func__); #endif } } else offset = 0; /* * This is the last work request for a given TLS mbuf chain if * it is the last mbuf in the chain and FIN is not set. If * FIN is set, then ktls_write_tcp_fin() will write out the * last work request. */ last_wr = m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) == 0; /* * The host stack may ask us to not send part of the start of * a TLS record. (For example, the stack might have * previously sent a "short" TLS record and might later send * down an mbuf that requests to send the remainder of the TLS * record.) The crypto engine must process a TLS record from * the beginning if computing a GCM tag or HMAC, so we always * send the TLS record from the beginning as input to the * crypto engine and via CPL_TX_DATA to TP. However, TP will * drop individual packets after they have been chopped up * into MSS-sized chunks if the entire sequence range of those * packets is less than SND_UNA. SND_UNA is computed as * TX_MAX - SND_UNA_RAW. Thus, use the offset stored in * m_data to set TX_MAX to the first byte in the TCP sequence * space the host actually wants us to send and set * SND_UNA_RAW to 0. * * If the host sends us back to back requests that span the * trailer of a single TLS record (first request ends "in" the * trailer and second request starts at the next byte but * still "in" the trailer), the initial bytes of the trailer * that the first request drops will not be retransmitted. If * the host uses the same requests when retransmitting the * connection will hang. To handle this, always transmit the * full trailer for a request that begins "in" the trailer * (the second request in the example above). This should * also help to avoid retransmits for the common case. * * A similar condition exists when using CBC for back to back * requests that span a single AES block. The first request * will be truncated to end at the end of the previous AES * block. To handle this, always begin transmission at the * start of the current AES block. */ tx_max_offset = mtod(m_tls, vm_offset_t); if (tx_max_offset > TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen) { /* Always send the full trailer. */ tx_max_offset = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen; } if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC && tx_max_offset > TLS_HEADER_LENGTH) { /* Always send all of the first AES block. */ tx_max_offset = TLS_HEADER_LENGTH + rounddown(tx_max_offset - TLS_HEADER_LENGTH, AES_BLOCK_LEN); } tx_max = tcp_seqno + tx_max_offset; /* * Update TCB fields. Reserve space for the FW_ULPTX_WR header * but don't populate it until we know how many field updates * are required. */ if (using_scratch) wr = (void *)txq->ss; else wr = dst; out = (void *)(wr + 1); fields = 0; if (set_l2t_idx) { KASSERT(nsegs != 0, ("trying to set L2T_IX for subsequent TLS WR")); #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d set L2T_IX to %d", __func__, tlsp->tid, tlsp->l2te->idx); #endif write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_L2T_IX, V_TCB_L2T_IX(M_TCB_L2T_IX), V_TCB_L2T_IX(tlsp->l2te->idx)); out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); fields++; } if (tsopt != NULL && tlsp->prev_tsecr != ntohl(tsopt[1])) { KASSERT(nsegs != 0, ("trying to set T_RTSEQ_RECENT for subsequent TLS WR")); #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d wrote updated T_RTSEQ_RECENT", __func__, tlsp->tid); #endif write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_T_RTSEQ_RECENT, V_TCB_T_RTSEQ_RECENT(M_TCB_T_RTSEQ_RECENT), V_TCB_T_RTSEQ_RECENT(ntohl(tsopt[1]))); out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); fields++; tlsp->prev_tsecr = ntohl(tsopt[1]); } if (first_wr || tlsp->prev_seq != tx_max) { KASSERT(nsegs != 0, ("trying to set TX_MAX for subsequent TLS WR")); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d setting TX_MAX to %u (tcp_seqno %u)", __func__, tlsp->tid, tx_max, tcp_seqno); #endif write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_TX_MAX, V_TCB_TX_MAX(M_TCB_TX_MAX), V_TCB_TX_MAX(tx_max)); out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); fields++; } /* * If there is data to drop at the beginning of this TLS * record or if this is a retransmit, * reset SND_UNA_RAW to 0 so that SND_UNA == TX_MAX. */ if (tlsp->prev_seq != tx_max || mtod(m_tls, vm_offset_t) != 0) { KASSERT(nsegs != 0, ("trying to clear SND_UNA_RAW for subsequent TLS WR")); #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d clearing SND_UNA_RAW", __func__, tlsp->tid); #endif write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_SND_UNA_RAW, V_TCB_SND_UNA_RAW(M_TCB_SND_UNA_RAW), V_TCB_SND_UNA_RAW(0)); out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); fields++; } /* * Store the expected sequence number of the next byte after * this record. */ tlsp->prev_seq = tcp_seqno + tlen; if (first_wr || tlsp->prev_ack != ntohl(tcp->th_ack)) { KASSERT(nsegs != 0, ("trying to set RCV_NXT for subsequent TLS WR")); write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_RCV_NXT, V_TCB_RCV_NXT(M_TCB_RCV_NXT), V_TCB_RCV_NXT(ntohl(tcp->th_ack))); out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); fields++; tlsp->prev_ack = ntohl(tcp->th_ack); } if (first_wr || tlsp->prev_win != ntohs(tcp->th_win)) { KASSERT(nsegs != 0, ("trying to set RCV_WND for subsequent TLS WR")); write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_RCV_WND, V_TCB_RCV_WND(M_TCB_RCV_WND), V_TCB_RCV_WND(ntohs(tcp->th_win))); out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); fields++; tlsp->prev_win = ntohs(tcp->th_win); } /* Recalculate 'nsegs' if cached value is not available. */ if (nsegs == 0) nsegs = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset, plen - (m_tls->m_epg_hdrlen + offset)); /* Calculate the size of the TLS work request. */ twr_len = ktls_base_wr_size(tlsp); imm_len = 0; if (offset == 0) imm_len += m_tls->m_epg_hdrlen; if (plen == tlen) imm_len += AES_BLOCK_LEN; twr_len += roundup2(imm_len, 16); twr_len += ktls_sgl_size(nsegs); /* * If any field updates were required, determine if they can * be included in the TLS work request. If not, use the * FW_ULPTX_WR work request header at 'wr' as a dedicated work * request for the field updates and start a new work request * for the TLS work request afterward. */ if (fields != 0) { wr_len = fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16); if (twr_len + wr_len <= SGE_MAX_WR_LEN && tlsp->sc->tlst.combo_wrs) { wr_len += twr_len; txpkt = (void *)out; } else { wr_len += sizeof(*wr); wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR)); wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA | V_FW_WR_LEN16(wr_len / 16)); wr->cookie = 0; /* * If we were using scratch space, copy the * field updates work request to the ring. */ if (using_scratch) { out = dst; copy_to_txd(eq, txq->ss, &out, wr_len); } ndesc = howmany(wr_len, EQ_ESIZE); MPASS(ndesc <= available); txq->raw_wrs++; txsd = &txq->sdesc[pidx]; txsd->m = NULL; txsd->desc_used = ndesc; IDXINCR(pidx, ndesc, eq->sidx); dst = &eq->desc[pidx]; /* * Determine if we should use scratch space * for the TLS work request based on the * available space after advancing pidx for * the field updates work request. */ wr_len = twr_len; using_scratch = (eq->sidx - pidx < howmany(wr_len, EQ_ESIZE)); if (using_scratch) wr = (void *)txq->ss; else wr = dst; txpkt = (void *)(wr + 1); } } else { wr_len = twr_len; txpkt = (void *)out; } wr_len = roundup2(wr_len, 16); MPASS(ndesc + howmany(wr_len, EQ_ESIZE) <= available); /* FW_ULPTX_WR */ wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR)); wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA | V_FW_WR_LEN16(wr_len / 16)); wr->cookie = 0; /* ULP_TXPKT */ txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DATAMODIFY(0) | V_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) | V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1)); txpkt->len = htobe32(howmany(twr_len - sizeof(*wr), 16)); /* ULPTX_IDATA sub-command */ idata = (void *)(txpkt + 1); idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | V_ULP_TX_SC_MORE(1)); idata->len = sizeof(struct cpl_tx_sec_pdu); /* * The key context, CPL_TX_DATA, and immediate data are part * of this ULPTX_IDATA when using an inline key. When reading * the key from memory, the CPL_TX_DATA and immediate data are * part of a separate ULPTX_IDATA. */ if (tlsp->inline_key) idata->len += tlsp->tx_key_info_size + sizeof(struct cpl_tx_data) + imm_len; idata->len = htobe32(idata->len); /* CPL_TX_SEC_PDU */ sec_pdu = (void *)(idata + 1); /* * For short records, AAD is counted as header data in SCMD0, * the IV is next followed by a cipher region for the payload. */ if (plen == tlen) { aad_start = 0; aad_stop = 0; iv_offset = 1; auth_start = 0; auth_stop = 0; auth_insert = 0; cipher_start = AES_BLOCK_LEN + 1; cipher_stop = 0; sec_pdu->pldlen = htobe32(16 + plen - (m_tls->m_epg_hdrlen + offset)); /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ sec_pdu->seqno_numivs = tlsp->scmd0_short.seqno_numivs; sec_pdu->ivgen_hdrlen = htobe32( tlsp->scmd0_short.ivgen_hdrlen | V_SCMD_HDR_LEN(offset == 0 ? m_tls->m_epg_hdrlen : 0)); txq->kern_tls_short++; } else { /* * AAD is TLS header. IV is after AAD. The cipher region * starts after the IV. See comments in ccr_authenc() and * ccr_gmac() in t4_crypto.c regarding cipher and auth * start/stop values. */ aad_start = 1; aad_stop = TLS_HEADER_LENGTH; iv_offset = TLS_HEADER_LENGTH + 1; cipher_start = m_tls->m_epg_hdrlen + 1; if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { cipher_stop = 0; auth_start = cipher_start; auth_stop = 0; auth_insert = 0; } else { cipher_stop = 0; auth_start = cipher_start; auth_stop = 0; auth_insert = 0; } sec_pdu->pldlen = htobe32(plen); /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ sec_pdu->seqno_numivs = tlsp->scmd0.seqno_numivs; sec_pdu->ivgen_hdrlen = tlsp->scmd0.ivgen_hdrlen; if (mtod(m_tls, vm_offset_t) == 0) txq->kern_tls_full++; else txq->kern_tls_partial++; } sec_pdu->op_ivinsrtofst = htobe32( V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | V_CPL_TX_SEC_PDU_IVINSRTOFST(iv_offset)); sec_pdu->aadstart_cipherstop_hi = htobe32( V_CPL_TX_SEC_PDU_AADSTART(aad_start) | V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) | V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) | V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4)); sec_pdu->cipherstop_lo_authinsert = htobe32( V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) | V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) | V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) | V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert)); sec_pdu->scmd1 = htobe64(m_tls->m_epg_seqno); /* Key context */ out = (void *)(sec_pdu + 1); if (tlsp->inline_key) { memcpy(out, &tlsp->keyctx, tlsp->tx_key_info_size); out += tlsp->tx_key_info_size; } else { /* ULPTX_SC_MEMRD to read key context. */ memrd = (void *)out; memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) | V_ULP_TX_SC_MORE(1) | V_ULPTX_LEN16(tlsp->tx_key_info_size >> 4)); memrd->addr = htobe32(tlsp->tx_key_addr >> 5); /* ULPTX_IDATA for CPL_TX_DATA and TLS header. */ idata = (void *)(memrd + 1); idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | V_ULP_TX_SC_MORE(1)); idata->len = htobe32(sizeof(struct cpl_tx_data) + imm_len); out = (void *)(idata + 1); } /* CPL_TX_DATA */ tx_data = (void *)out; OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tlsp->tid)); if (m->m_pkthdr.csum_flags & CSUM_TSO) { mss = m->m_pkthdr.tso_segsz; tlsp->prev_mss = mss; } else if (tlsp->prev_mss != 0) mss = tlsp->prev_mss; else mss = tlsp->vi->ifp->if_mtu - (m->m_pkthdr.l3hlen + m->m_pkthdr.l4hlen); if (offset == 0) { tx_data->len = htobe32(V_TX_DATA_MSS(mss) | V_TX_LENGTH(tlen)); tx_data->rsvd = htobe32(tcp_seqno); } else { tx_data->len = htobe32(V_TX_DATA_MSS(mss) | V_TX_LENGTH(tlen - (m_tls->m_epg_hdrlen + offset))); tx_data->rsvd = htobe32(tcp_seqno + m_tls->m_epg_hdrlen + offset); } tx_data->flags = htobe32(F_TX_BYPASS); if (last_wr && tcp->th_flags & TH_PUSH) tx_data->flags |= htobe32(F_TX_PUSH | F_TX_SHOVE); /* Populate the TLS header */ out = (void *)(tx_data + 1); if (offset == 0) { memcpy(out, m_tls->m_epg_hdr, m_tls->m_epg_hdrlen); out += m_tls->m_epg_hdrlen; } /* AES IV for a short record. */ if (plen == tlen) { iv = out; if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { memcpy(iv, tlsp->keyctx.txhdr.txsalt, SALT_SIZE); memcpy(iv + 4, hdr + 1, 8); *(uint32_t *)(iv + 12) = htobe32(2 + offset / AES_BLOCK_LEN); } else memcpy(iv, hdr + 1, AES_BLOCK_LEN); out += AES_BLOCK_LEN; } if (imm_len % 16 != 0) { /* Zero pad to an 8-byte boundary. */ memset(out, 0, 8 - (imm_len % 8)); out += 8 - (imm_len % 8); /* * Insert a ULP_TX_SC_NOOP if needed so the SGL is * 16-byte aligned. */ if (imm_len % 16 <= 8) { idata = (void *)out; idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); idata->len = htobe32(0); out = (void *)(idata + 1); } } /* SGL for record payload */ sglist_reset(txq->gl); if (sglist_append_mbuf_epg(txq->gl, m_tls, m_tls->m_epg_hdrlen + offset, plen - (m_tls->m_epg_hdrlen + offset)) != 0) { #ifdef INVARIANTS panic("%s: failed to append sglist", __func__); #endif } write_gl_to_buf(txq->gl, out); if (using_scratch) { out = dst; copy_to_txd(eq, txq->ss, &out, wr_len); } ndesc += howmany(wr_len, EQ_ESIZE); MPASS(ndesc <= available); txq->kern_tls_records++; txq->kern_tls_octets += tlen - mtod(m_tls, vm_offset_t); if (mtod(m_tls, vm_offset_t) != 0) { if (offset == 0) txq->kern_tls_waste += mtod(m_tls, vm_offset_t); else txq->kern_tls_waste += mtod(m_tls, vm_offset_t) - (m_tls->m_epg_hdrlen + offset); } txsd = &txq->sdesc[pidx]; if (last_wr) txsd->m = m; else txsd->m = NULL; txsd->desc_used = howmany(wr_len, EQ_ESIZE); return (ndesc); } static int ktls_write_tcp_fin(struct sge_txq *txq, void *dst, struct mbuf *m, u_int available, tcp_seq tcp_seqno, u_int pidx) { struct tx_sdesc *txsd; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; uint64_t ctrl1; int len16, ndesc, pktlen; struct ether_header *eh; struct ip *ip, newip; struct ip6_hdr *ip6, newip6; struct tcphdr *tcp, newtcp; caddr_t out; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m); wr = dst; pktlen = m->m_len; ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen; len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16); ndesc = tx_len16_to_desc(len16); MPASS(ndesc <= available); /* Firmware work request header */ wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; cpl = (void *)(wr + 1); /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); out = (void *)(cpl + 1); /* Copy over Ethernet header. */ eh = mtod(m, struct ether_header *); copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen); /* Fixup length in IP header and copy out. */ if (ntohs(eh->ether_type) == ETHERTYPE_IP) { ip = (void *)((char *)eh + m->m_pkthdr.l2hlen); newip = *ip; newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen); copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip)); if (m->m_pkthdr.l3hlen > sizeof(*ip)) copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out, m->m_pkthdr.l3hlen - sizeof(*ip)); ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) | V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) | V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen); } else { ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen); newip6 = *ip6; newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen); copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6)); MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6)); ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) | V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) | V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen); } cpl->ctrl1 = htobe64(ctrl1); txq->txcsum++; /* Set sequence number in TCP header. */ tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); newtcp = *tcp; newtcp.th_seq = htonl(tcp_seqno); copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp)); /* Copy rest of packet. */ copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len - (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); txq->imm_wrs++; txq->txpkt_wrs++; txq->kern_tls_fin++; txsd = &txq->sdesc[pidx]; txsd->m = m; txsd->desc_used = ndesc; return (ndesc); } int t6_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m, u_int nsegs, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct tlspcb *tlsp; struct tcphdr *tcp; struct mbuf *m_tls; struct ether_header *eh; tcp_seq tcp_seqno; u_int ndesc, pidx, totdesc; uint16_t vlan_tag; bool has_fin, set_l2t_idx; void *tsopt; M_ASSERTPKTHDR(m); MPASS(m->m_pkthdr.snd_tag != NULL); tlsp = mst_to_tls(m->m_pkthdr.snd_tag); totdesc = 0; eh = mtod(m, struct ether_header *); tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); pidx = eq->pidx; has_fin = (tcp->th_flags & TH_FIN) != 0; /* * If this TLS record has a FIN, then we will send any * requested options as part of the FIN packet. */ if (!has_fin && ktls_has_tcp_options(tcp)) { ndesc = ktls_write_tcp_options(txq, dst, m, available, pidx); totdesc += ndesc; IDXINCR(pidx, ndesc, eq->sidx); dst = &eq->desc[pidx]; #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d wrote TCP options packet", __func__, tlsp->tid); #endif } /* * Allocate a new L2T entry if necessary. This may write out * a work request to the txq. */ if (m->m_flags & M_VLANTAG) vlan_tag = m->m_pkthdr.ether_vtag; else vlan_tag = 0xfff; set_l2t_idx = false; if (tlsp->l2te == NULL || tlsp->l2te->vlan != vlan_tag || memcmp(tlsp->l2te->dmac, eh->ether_dhost, ETHER_ADDR_LEN) != 0) { set_l2t_idx = true; if (tlsp->l2te) t4_l2t_release(tlsp->l2te); tlsp->l2te = t4_l2t_alloc_tls(tlsp->sc, txq, dst, &ndesc, vlan_tag, tlsp->vi->pi->lport, eh->ether_dhost); if (tlsp->l2te == NULL) CXGBE_UNIMPLEMENTED("failed to allocate TLS L2TE"); if (ndesc != 0) { MPASS(ndesc <= available - totdesc); txq->raw_wrs++; txsd = &txq->sdesc[pidx]; txsd->m = NULL; txsd->desc_used = ndesc; totdesc += ndesc; IDXINCR(pidx, ndesc, eq->sidx); dst = &eq->desc[pidx]; } } /* * Iterate over each TLS record constructing a work request * for that record. */ for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) { MPASS(m_tls->m_flags & M_EXTPG); /* * Determine the initial TCP sequence number for this * record. */ tsopt = NULL; if (m_tls == m->m_next) { tcp_seqno = ntohl(tcp->th_seq) - mtod(m_tls, vm_offset_t); if (tlsp->using_timestamps) tsopt = ktls_find_tcp_timestamps(tcp); } else { MPASS(mtod(m_tls, vm_offset_t) == 0); tcp_seqno = tlsp->prev_seq; } ndesc = ktls_write_tls_wr(tlsp, txq, dst, m, tcp, m_tls, nsegs, available - totdesc, tcp_seqno, tsopt, pidx, set_l2t_idx); totdesc += ndesc; IDXINCR(pidx, ndesc, eq->sidx); dst = &eq->desc[pidx]; /* * The value of nsegs from the header mbuf's metadata * is only valid for the first TLS record. */ nsegs = 0; /* Only need to set the L2T index once. */ set_l2t_idx = false; } if (has_fin) { /* * If the TCP header for this chain has FIN sent, then * explicitly send a packet that has FIN set. This * will also have PUSH set if requested. This assumes * we sent at least one TLS record work request and * uses the TCP sequence number after that reqeust as * the sequence number for the FIN packet. */ ndesc = ktls_write_tcp_fin(txq, dst, m, available, tlsp->prev_seq, pidx); totdesc += ndesc; } MPASS(totdesc <= available); return (totdesc); } void cxgbe_tls_tag_free(struct m_snd_tag *mst) { struct adapter *sc; struct tlspcb *tlsp; tlsp = mst_to_tls(mst); sc = tlsp->sc; CTR2(KTR_CXGBE, "%s: tid %d", __func__, tlsp->tid); if (tlsp->l2te) t4_l2t_release(tlsp->l2te); if (tlsp->tid >= 0) release_tid(sc, tlsp->tid, tlsp->ctrlq); if (tlsp->ce) t4_release_lip(sc, tlsp->ce); if (tlsp->tx_key_addr >= 0) free_keyid(tlsp, tlsp->tx_key_addr); - explicit_bzero(&tlsp->keyctx, sizeof(&tlsp->keyctx)); - free(tlsp, M_CXGBE); + zfree(tlsp, M_CXGBE); } void t6_ktls_modload(void) { t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, ktls_act_open_rpl, CPL_COOKIE_KERN_TLS); } void t6_ktls_modunload(void) { t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL, CPL_COOKIE_KERN_TLS); } #else int cxgbe_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **pt) { return (ENXIO); } int t6_ktls_parse_pkt(struct mbuf *m, int *nsegsp, int *len16p) { return (EINVAL); } int t6_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m, u_int nsegs, u_int available) { panic("can't happen"); } void cxgbe_tls_tag_free(struct m_snd_tag *mst) { panic("can't happen"); } void t6_ktls_modload(void) { } void t6_ktls_modunload(void) { } #endif Index: head/sys/dev/random/randomdev.c =================================================================== --- head/sys/dev/random/randomdev.c (revision 362623) +++ head/sys/dev/random/randomdev.c (revision 362624) @@ -1,434 +1,433 @@ /*- * Copyright (c) 2017 Oliver Pinter * Copyright (c) 2000-2015 Mark R V Murray * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define RANDOM_UNIT 0 /* * In loadable random, the core randomdev.c / random(9) routines have static * visibility and an alternative name to avoid conflicting with the function * pointers of the real names in the core kernel. random_alg_context_init * installs pointers to the loadable static names into the core kernel's * function pointers at SI_SUB_RANDOM:SI_ORDER_SECOND. */ #if defined(RANDOM_LOADABLE) static int (read_random_uio)(struct uio *, bool); static void (read_random)(void *, u_int); static bool (is_random_seeded)(void); #endif static d_read_t randomdev_read; static d_write_t randomdev_write; static d_poll_t randomdev_poll; static d_ioctl_t randomdev_ioctl; static struct cdevsw random_cdevsw = { .d_name = "random", .d_version = D_VERSION, .d_read = randomdev_read, .d_write = randomdev_write, .d_poll = randomdev_poll, .d_ioctl = randomdev_ioctl, }; /* For use with make_dev(9)/destroy_dev(9). */ static struct cdev *random_dev; #if defined(RANDOM_LOADABLE) static void random_alg_context_init(void *dummy __unused) { _read_random_uio = (read_random_uio); _read_random = (read_random); _is_random_seeded = (is_random_seeded); } SYSINIT(random_device, SI_SUB_RANDOM, SI_ORDER_SECOND, random_alg_context_init, NULL); #endif static struct selinfo rsel; /* * This is the read uio(9) interface for random(4). */ /* ARGSUSED */ static int randomdev_read(struct cdev *dev __unused, struct uio *uio, int flags) { return ((read_random_uio)(uio, (flags & O_NONBLOCK) != 0)); } /* * If the random device is not seeded, blocks until it is seeded. * * Returns zero when the random device is seeded. * * If the 'interruptible' parameter is true, and the device is unseeded, this * routine may be interrupted. If interrupted, it will return either ERESTART * or EINTR. */ #define SEEDWAIT_INTERRUPTIBLE true #define SEEDWAIT_UNINTERRUPTIBLE false static int randomdev_wait_until_seeded(bool interruptible) { int error, spamcount, slpflags; slpflags = interruptible ? PCATCH : 0; error = 0; spamcount = 0; while (!p_random_alg_context->ra_seeded()) { /* keep tapping away at the pre-read until we seed/unblock. */ p_random_alg_context->ra_pre_read(); /* Only bother the console every 10 seconds or so */ if (spamcount == 0) printf("random: %s unblock wait\n", __func__); spamcount = (spamcount + 1) % 100; error = tsleep(p_random_alg_context, slpflags, "randseed", hz / 10); if (error == ERESTART || error == EINTR) { KASSERT(interruptible, ("unexpected wake of non-interruptible sleep")); break; } /* Squash tsleep timeout condition */ if (error == EWOULDBLOCK) error = 0; KASSERT(error == 0, ("unexpected tsleep error %d", error)); } return (error); } int (read_random_uio)(struct uio *uio, bool nonblock) { /* 16 MiB takes about 0.08 s CPU time on my 2017 AMD Zen CPU */ #define SIGCHK_PERIOD (16 * 1024 * 1024) const size_t sigchk_period = SIGCHK_PERIOD; CTASSERT(SIGCHK_PERIOD % PAGE_SIZE == 0); #undef SIGCHK_PERIOD uint8_t *random_buf; size_t total_read, read_len; ssize_t bufsize; int error; KASSERT(uio->uio_rw == UIO_READ, ("%s: bogus write", __func__)); KASSERT(uio->uio_resid >= 0, ("%s: bogus negative resid", __func__)); p_random_alg_context->ra_pre_read(); error = 0; /* (Un)Blocking logic */ if (!p_random_alg_context->ra_seeded()) { if (nonblock) error = EWOULDBLOCK; else error = randomdev_wait_until_seeded( SEEDWAIT_INTERRUPTIBLE); } if (error != 0) return (error); read_rate_increment(howmany(uio->uio_resid + 1, sizeof(uint32_t))); total_read = 0; /* Easy to deal with the trivial 0 byte case. */ if (__predict_false(uio->uio_resid == 0)) return (0); /* * If memory is plentiful, use maximally sized requests to avoid * per-call algorithm overhead. But fall back to a single page * allocation if the full request isn't immediately available. */ bufsize = MIN(sigchk_period, (size_t)uio->uio_resid); random_buf = malloc(bufsize, M_ENTROPY, M_NOWAIT); if (random_buf == NULL) { bufsize = PAGE_SIZE; random_buf = malloc(bufsize, M_ENTROPY, M_WAITOK); } error = 0; while (uio->uio_resid > 0 && error == 0) { read_len = MIN((size_t)uio->uio_resid, bufsize); p_random_alg_context->ra_read(random_buf, read_len); /* * uiomove() may yield the CPU before each 'read_len' bytes (up * to bufsize) are copied out. */ error = uiomove(random_buf, read_len, uio); total_read += read_len; /* * Poll for signals every few MBs to avoid very long * uninterruptible syscalls. */ if (error == 0 && uio->uio_resid != 0 && total_read % sigchk_period == 0) { error = tsleep_sbt(p_random_alg_context, PCATCH, "randrd", SBT_1NS, 0, C_HARDCLOCK); /* Squash tsleep timeout condition */ if (error == EWOULDBLOCK) error = 0; } } /* * Short reads due to signal interrupt should not indicate error. * Instead, the uio will reflect that the read was shorter than * requested. */ if (error == ERESTART || error == EINTR) error = 0; - explicit_bzero(random_buf, bufsize); - free(random_buf, M_ENTROPY); + zfree(random_buf, M_ENTROPY); return (error); } /*- * Kernel API version of read_random(). This is similar to read_random_uio(), * except it doesn't interface with uio(9). It cannot assumed that random_buf * is a multiple of RANDOM_BLOCKSIZE bytes. * * If the tunable 'kern.random.initial_seeding.bypass_before_seeding' is set * non-zero, silently fail to emit random data (matching the pre-r346250 * behavior). If read_random is called prior to seeding and bypassed because * of this tunable, the condition is reported in the read-only sysctl * 'kern.random.initial_seeding.read_random_bypassed_before_seeding'. */ void (read_random)(void *random_buf, u_int len) { KASSERT(random_buf != NULL, ("No suitable random buffer in %s", __func__)); p_random_alg_context->ra_pre_read(); if (len == 0) return; /* (Un)Blocking logic */ if (__predict_false(!p_random_alg_context->ra_seeded())) { if (random_bypass_before_seeding) { if (!read_random_bypassed_before_seeding) { if (!random_bypass_disable_warnings) printf("read_random: WARNING: bypassing" " request for random data because " "the random device is not yet " "seeded and the knob " "'bypass_before_seeding' was " "enabled.\n"); read_random_bypassed_before_seeding = true; } /* Avoid potentially leaking stack garbage */ memset(random_buf, 0, len); return; } (void)randomdev_wait_until_seeded(SEEDWAIT_UNINTERRUPTIBLE); } read_rate_increment(roundup2(len, sizeof(uint32_t))); p_random_alg_context->ra_read(random_buf, len); } bool (is_random_seeded)(void) { return (p_random_alg_context->ra_seeded()); } static __inline void randomdev_accumulate(uint8_t *buf, u_int count) { static u_int destination = 0; static struct harvest_event event; static struct randomdev_hash hash; static uint32_t entropy_data[RANDOM_KEYSIZE_WORDS]; uint32_t timestamp; int i; /* Extra timing here is helpful to scrape scheduler jitter entropy */ randomdev_hash_init(&hash); timestamp = (uint32_t)get_cyclecount(); randomdev_hash_iterate(&hash, ×tamp, sizeof(timestamp)); randomdev_hash_iterate(&hash, buf, count); timestamp = (uint32_t)get_cyclecount(); randomdev_hash_iterate(&hash, ×tamp, sizeof(timestamp)); randomdev_hash_finish(&hash, entropy_data); for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) { event.he_somecounter = (uint32_t)get_cyclecount(); event.he_size = sizeof(event.he_entropy); event.he_source = RANDOM_CACHED; event.he_destination = destination++; /* Harmless cheating */ memcpy(event.he_entropy, entropy_data + i, sizeof(event.he_entropy)); p_random_alg_context->ra_event_processor(&event); } explicit_bzero(&event, sizeof(event)); explicit_bzero(entropy_data, sizeof(entropy_data)); } /* ARGSUSED */ static int randomdev_write(struct cdev *dev __unused, struct uio *uio, int flags __unused) { uint8_t *random_buf; int c, error = 0; ssize_t nbytes; random_buf = malloc(PAGE_SIZE, M_ENTROPY, M_WAITOK); nbytes = uio->uio_resid; while (uio->uio_resid > 0 && error == 0) { c = MIN(uio->uio_resid, PAGE_SIZE); error = uiomove(random_buf, c, uio); if (error) break; randomdev_accumulate(random_buf, c); tsleep(p_random_alg_context, 0, "randwr", hz/10); } if (nbytes != uio->uio_resid && (error == ERESTART || error == EINTR)) /* Partial write, not error. */ error = 0; free(random_buf, M_ENTROPY); return (error); } /* ARGSUSED */ static int randomdev_poll(struct cdev *dev __unused, int events, struct thread *td __unused) { if (events & (POLLIN | POLLRDNORM)) { if (p_random_alg_context->ra_seeded()) events &= (POLLIN | POLLRDNORM); else selrecord(td, &rsel); } return (events); } /* This will be called by the entropy processor when it seeds itself and becomes secure */ void randomdev_unblock(void) { selwakeuppri(&rsel, PUSER); wakeup(p_random_alg_context); printf("random: unblocking device.\n"); /* Do random(9) a favour while we are about it. */ (void)atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_NONE, ARC4_ENTR_HAVE); } /* ARGSUSED */ static int randomdev_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr __unused, int flags __unused, struct thread *td __unused) { int error = 0; switch (cmd) { /* Really handled in upper layer */ case FIOASYNC: case FIONBIO: break; default: error = ENOTTY; } return (error); } /* ARGSUSED */ static int randomdev_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: printf("random: entropy device external interface\n"); random_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &random_cdevsw, RANDOM_UNIT, NULL, UID_ROOT, GID_WHEEL, 0644, "random"); make_dev_alias(random_dev, "urandom"); /* compatibility */ break; case MOD_UNLOAD: error = EBUSY; break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t randomdev_mod = { "random_device", randomdev_modevent, 0 }; DECLARE_MODULE(random_device, randomdev_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); MODULE_VERSION(random_device, 1); MODULE_DEPEND(random_device, crypto, 1, 1, 1); MODULE_DEPEND(random_device, random_harvestq, 1, 1, 1); Index: head/sys/geom/eli/g_eli.c =================================================================== --- head/sys/geom/eli/g_eli.c (revision 362623) +++ head/sys/geom/eli/g_eli.c (revision 362624) @@ -1,1457 +1,1454 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2019 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_eli, "GEOM crypto module"); MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data"); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_ELI stuff"); static int g_eli_version = G_ELI_VERSION; SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0, "GELI version"); int g_eli_debug = 0; SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0, "Debug level"); static u_int g_eli_tries = 3; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0, "Number of tries for entering the passphrase"); static u_int g_eli_visible_passphrase = GETS_NOECHO; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN, &g_eli_visible_passphrase, 0, "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)"); u_int g_eli_overwrites = G_ELI_OVERWRITES; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites, 0, "Number of times on-disk keys should be overwritten when destroying them"); static u_int g_eli_threads = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0, "Number of threads doing crypto work"); u_int g_eli_batch = 0; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0, "Use crypto operations batching"); /* * Passphrase cached during boot, in order to be more user-friendly if * there are multiple providers using the same passphrase. */ static char cached_passphrase[256]; static u_int g_eli_boot_passcache = 1; TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache); SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD, &g_eli_boot_passcache, 0, "Passphrases are cached during boot process for possible reuse"); static void fetch_loader_passphrase(void * dummy) { char * env_passphrase; KASSERT(dynamic_kenv, ("need dynamic kenv")); if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) { /* Extract passphrase from the environment. */ strlcpy(cached_passphrase, env_passphrase, sizeof(cached_passphrase)); freeenv(env_passphrase); /* Wipe the passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); } } SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY, fetch_loader_passphrase, NULL); static void zero_boot_passcache(void) { explicit_bzero(cached_passphrase, sizeof(cached_passphrase)); } static void zero_geli_intake_keys(void) { struct keybuf *keybuf; int i; if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, clear all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { explicit_bzero(keybuf->kb_ents[i].ke_data, sizeof(keybuf->kb_ents[i].ke_data)); keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE; } } } } static void zero_intake_passcache(void *dummy) { zero_boot_passcache(); zero_geli_intake_keys(); } EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0); static eventhandler_tag g_eli_pre_sync = NULL; static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, off_t offset, struct g_eli_metadata *md); static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static void g_eli_init(struct g_class *mp); static void g_eli_fini(struct g_class *mp); static g_taste_t g_eli_taste; static g_dumpconf_t g_eli_dumpconf; struct g_class g_eli_class = { .name = G_ELI_CLASS_NAME, .version = G_VERSION, .ctlreq = g_eli_config, .taste = g_eli_taste, .destroy_geom = g_eli_destroy_geom, .init = g_eli_init, .fini = g_eli_fini }; /* * Code paths: * BIO_READ: * g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ /* * EAGAIN from crypto(9) means, that we were probably balanced to another crypto * accelerator or something like this. * The function updates the SID and rerun the operation. */ int g_eli_crypto_rerun(struct cryptop *crp) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; bp = (struct bio *)crp->crp_opaque; sc = bp->bio_to->geom->softc; LIST_FOREACH(wr, &sc->sc_workers, w_next) { if (wr->w_number == bp->bio_pflags) break; } KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags)); G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %p -> %p).", bp->bio_cmd == BIO_READ ? "READ" : "WRITE", wr->w_sid, crp->crp_session); wr->w_sid = crp->crp_session; crp->crp_etype = 0; error = crypto_dispatch(crp); if (error == 0) return (0); G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error); crp->crp_etype = error; return (error); } static void g_eli_getattr_done(struct bio *bp) { if (bp->bio_error == 0 && !strcmp(bp->bio_attribute, "GEOM::physpath")) { strlcat(bp->bio_data, "/eli", bp->bio_length); } g_std_done(bp); } /* * The function is called afer reading encrypted data from the provider. * * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver */ void g_eli_read_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; sc = pbp->bio_to->geom->softc; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; if (pbp->bio_driver2 != NULL) { free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; } g_io_deliver(pbp, pbp->bio_error); if (sc != NULL) atomic_subtract_int(&sc->sc_inflight, 1); return; } mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, pbp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } /* * The function is called after we encrypt and write data. * * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver */ void g_eli_write_done(struct bio *bp) { struct g_eli_softc *sc; struct bio *pbp; G_ELI_LOGREQ(2, bp, "Request done."); pbp = bp->bio_parent; if (pbp->bio_error == 0 && bp->bio_error != 0) pbp->bio_error = bp->bio_error; g_destroy_bio(bp); /* * Do we have all sectors already? */ pbp->bio_inbed++; if (pbp->bio_inbed < pbp->bio_children) return; free(pbp->bio_driver2, M_ELI); pbp->bio_driver2 = NULL; if (pbp->bio_error != 0) { G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__, pbp->bio_error); pbp->bio_completed = 0; } else pbp->bio_completed = pbp->bio_length; /* * Write is finished, send it up. */ sc = pbp->bio_to->geom->softc; g_io_deliver(pbp, pbp->bio_error); if (sc != NULL) atomic_subtract_int(&sc->sc_inflight, 1); } /* * This function should never be called, but GEOM made as it set ->orphan() * method for every geom. */ static void g_eli_orphan_spoil_assert(struct g_consumer *cp) { panic("Function %s() called for %s.", __func__, cp->geom->name); } static void g_eli_orphan(struct g_consumer *cp) { struct g_eli_softc *sc; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; g_eli_destroy(sc, TRUE); } static void g_eli_resize(struct g_consumer *cp) { struct g_eli_softc *sc; struct g_provider *epp, *pp; off_t oldsize; g_topology_assert(); sc = cp->geom->softc; if (sc == NULL) return; if ((sc->sc_flags & G_ELI_FLAG_AUTORESIZE) == 0) { G_ELI_DEBUG(0, "Autoresize is turned off, old size: %jd.", (intmax_t)sc->sc_provsize); return; } pp = cp->provider; if ((sc->sc_flags & G_ELI_FLAG_ONETIME) == 0) { struct g_eli_metadata md; u_char *sector; int error; sector = NULL; error = g_eli_read_metadata_offset(cp->geom->class, pp, sc->sc_provsize - pp->sectorsize, &md); if (error != 0) { G_ELI_DEBUG(0, "Cannot read metadata from %s (error=%d).", pp->name, error); goto iofail; } md.md_provsize = pp->mediasize; sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO); eli_metadata_encode(&md, sector); error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot store metadata on %s (error=%d).", pp->name, error); goto iofail; } explicit_bzero(sector, pp->sectorsize); error = g_write_data(cp, sc->sc_provsize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot clear old metadata from %s (error=%d).", pp->name, error); goto iofail; } iofail: explicit_bzero(&md, sizeof(md)); - if (sector != NULL) { - explicit_bzero(sector, pp->sectorsize); - free(sector, M_ELI); - } + zfree(sector, M_ELI); } oldsize = sc->sc_mediasize; sc->sc_mediasize = eli_mediasize(sc, pp->mediasize, pp->sectorsize); g_eli_key_resize(sc); sc->sc_provsize = pp->mediasize; epp = LIST_FIRST(&sc->sc_geom->provider); g_resize_provider(epp, sc->sc_mediasize); G_ELI_DEBUG(0, "Device %s size changed from %jd to %jd.", epp->name, (intmax_t)oldsize, (intmax_t)sc->sc_mediasize); } /* * BIO_READ: * G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver * BIO_WRITE: * G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver */ static void g_eli_start(struct bio *bp) { struct g_eli_softc *sc; struct g_consumer *cp; struct bio *cbp; sc = bp->bio_to->geom->softc; KASSERT(sc != NULL, ("Provider's error should be set (error=%d)(device=%s).", bp->bio_to->error, bp->bio_to->name)); G_ELI_LOGREQ(2, bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_GETATTR: case BIO_FLUSH: case BIO_ZONE: case BIO_SPEEDUP: break; case BIO_DELETE: /* * If the user hasn't set the NODELETE flag, we just pass * it down the stack and let the layers beneath us do (or * not) whatever they do with it. If they have, we * reject it. A possible extension would be an * additional flag to take it as a hint to shred the data * with [multiple?] overwrites. */ if (!(sc->sc_flags & G_ELI_FLAG_NODELETE)) break; default: g_io_deliver(bp, EOPNOTSUPP); return; } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); return; } bp->bio_driver1 = cbp; bp->bio_pflags = G_ELI_NEW_BIO; switch (bp->bio_cmd) { case BIO_READ: if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) { g_eli_crypto_read(sc, bp, 0); break; } /* FALLTHROUGH */ case BIO_WRITE: mtx_lock(&sc->sc_queue_mtx); bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); break; case BIO_GETATTR: case BIO_FLUSH: case BIO_DELETE: case BIO_SPEEDUP: case BIO_ZONE: if (bp->bio_cmd == BIO_GETATTR) cbp->bio_done = g_eli_getattr_done; else cbp->bio_done = g_std_done; cp = LIST_FIRST(&sc->sc_geom->consumer); cbp->bio_to = cp->provider; G_ELI_LOGREQ(2, cbp, "Sending request."); g_io_request(cbp, cp); break; } } static int g_eli_newsession(struct g_eli_worker *wr) { struct g_eli_softc *sc; struct crypto_session_params csp; uint32_t caps; int error, new_crypto; void *key; sc = wr->w_softc; memset(&csp, 0, sizeof(csp)); csp.csp_mode = CSP_MODE_CIPHER; csp.csp_cipher_alg = sc->sc_ealgo; csp.csp_ivlen = g_eli_ivlen(sc->sc_ealgo); csp.csp_cipher_klen = sc->sc_ekeylen / 8; if (sc->sc_ealgo == CRYPTO_AES_XTS) csp.csp_cipher_klen <<= 1; if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { key = g_eli_key_hold(sc, 0, LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize); csp.csp_cipher_key = key; } else { key = NULL; csp.csp_cipher_key = sc->sc_ekey; } if (sc->sc_flags & G_ELI_FLAG_AUTH) { csp.csp_mode = CSP_MODE_ETA; csp.csp_auth_alg = sc->sc_aalgo; csp.csp_auth_klen = G_ELI_AUTH_SECKEYLEN; } switch (sc->sc_crypto) { case G_ELI_CRYPTO_SW_ACCEL: case G_ELI_CRYPTO_SW: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_SOFTWARE); break; case G_ELI_CRYPTO_HW: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_HARDWARE); break; case G_ELI_CRYPTO_UNKNOWN: error = crypto_newsession(&wr->w_sid, &csp, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE); if (error == 0) { caps = crypto_ses2caps(wr->w_sid); if (caps & CRYPTOCAP_F_HARDWARE) new_crypto = G_ELI_CRYPTO_HW; else if (caps & CRYPTOCAP_F_ACCEL_SOFTWARE) new_crypto = G_ELI_CRYPTO_SW_ACCEL; else new_crypto = G_ELI_CRYPTO_SW; mtx_lock(&sc->sc_queue_mtx); if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN) sc->sc_crypto = new_crypto; mtx_unlock(&sc->sc_queue_mtx); } break; default: panic("%s: invalid condition", __func__); } if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) { if (error) g_eli_key_drop(sc, key); else wr->w_first_key = key; } return (error); } static void g_eli_freesession(struct g_eli_worker *wr) { struct g_eli_softc *sc; crypto_freesession(wr->w_sid); if (wr->w_first_key != NULL) { sc = wr->w_softc; g_eli_key_drop(sc, wr->w_first_key); wr->w_first_key = NULL; } } static void g_eli_cancel(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) { KASSERT(bp->bio_pflags == G_ELI_NEW_BIO, ("Not new bio when canceling (bp=%p).", bp)); g_io_deliver(bp, ENXIO); } } static struct bio * g_eli_takefirst(struct g_eli_softc *sc) { struct bio *bp; mtx_assert(&sc->sc_queue_mtx, MA_OWNED); if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) return (bioq_takefirst(&sc->sc_queue)); /* * Device suspended, so we skip new I/O requests. */ TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { if (bp->bio_pflags != G_ELI_NEW_BIO) break; } if (bp != NULL) bioq_remove(&sc->sc_queue, bp); return (bp); } /* * This is the main function for kernel worker thread when we don't have * hardware acceleration and we have to do cryptography in software. * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM * threads with crypto work. */ static void g_eli_worker(void *arg) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct bio *bp; int error; wr = arg; sc = wr->w_softc; #ifdef EARLY_AP_STARTUP MPASS(!sc->sc_cpubind || smp_started); #elif defined(SMP) /* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */ if (sc->sc_cpubind) { while (!smp_started) tsleep(wr, 0, "geli:smp", hz / 4); } #endif thread_lock(curthread); sched_prio(curthread, PUSER); if (sc->sc_cpubind) sched_bind(curthread, wr->w_number % mp_ncpus); thread_unlock(curthread); G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm); for (;;) { mtx_lock(&sc->sc_queue_mtx); again: bp = g_eli_takefirst(sc); if (bp == NULL) { if (sc->sc_flags & G_ELI_FLAG_DESTROY) { g_eli_cancel(sc); LIST_REMOVE(wr, w_next); g_eli_freesession(wr); free(wr, M_ELI); G_ELI_DEBUG(1, "Thread %s exiting.", curthread->td_proc->p_comm); wakeup(&sc->sc_workers); mtx_unlock(&sc->sc_queue_mtx); kproc_exit(0); } while (sc->sc_flags & G_ELI_FLAG_SUSPEND) { if (sc->sc_inflight > 0) { G_ELI_DEBUG(0, "inflight=%d", sc->sc_inflight); /* * We still have inflight BIOs, so * sleep and retry. */ msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:inf", hz / 5); goto again; } /* * Suspend requested, mark the worker as * suspended and go to sleep. */ if (wr->w_active) { g_eli_freesession(wr); wr->w_active = FALSE; } wakeup(&sc->sc_workers); msleep(sc, &sc->sc_queue_mtx, PRIBIO, "geli:suspend", 0); if (!wr->w_active && !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) { error = g_eli_newsession(wr); KASSERT(error == 0, ("g_eli_newsession() failed on resume (error=%d)", error)); wr->w_active = TRUE; } goto again; } msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0); continue; } if (bp->bio_pflags == G_ELI_NEW_BIO) atomic_add_int(&sc->sc_inflight, 1); mtx_unlock(&sc->sc_queue_mtx); if (bp->bio_pflags == G_ELI_NEW_BIO) { bp->bio_pflags = 0; if (sc->sc_flags & G_ELI_FLAG_AUTH) { if (bp->bio_cmd == BIO_READ) g_eli_auth_read(sc, bp); else g_eli_auth_run(wr, bp); } else { if (bp->bio_cmd == BIO_READ) g_eli_crypto_read(sc, bp, 1); else g_eli_crypto_run(wr, bp); } } else { if (sc->sc_flags & G_ELI_FLAG_AUTH) g_eli_auth_run(wr, bp); else g_eli_crypto_run(wr, bp); } } } static int g_eli_read_metadata_offset(struct g_class *mp, struct g_provider *pp, off_t offset, struct g_eli_metadata *md) { struct g_geom *gp; struct g_consumer *cp; u_char *buf = NULL; int error; g_topology_assert(); gp = g_new_geomf(mp, "eli:taste"); gp->start = g_eli_start; gp->access = g_std_access; /* * g_eli_read_metadata() is always called from the event thread. * Our geom is created and destroyed in the same event, so there * could be no orphan nor spoil event in the meantime. */ gp->orphan = g_eli_orphan_spoil_assert; gp->spoiled = g_eli_orphan_spoil_assert; cp = g_new_consumer(gp); error = g_attach(cp, pp); if (error != 0) goto end; error = g_access(cp, 1, 0, 0); if (error != 0) goto end; g_topology_unlock(); buf = g_read_data(cp, offset, pp->sectorsize, &error); g_topology_lock(); if (buf == NULL) goto end; error = eli_metadata_decode(buf, md); if (error != 0) goto end; /* Metadata was read and decoded successfully. */ end: if (buf != NULL) g_free(buf); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, 0, 0); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); return (error); } int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp, struct g_eli_metadata *md) { return (g_eli_read_metadata_offset(mp, pp, pp->mediasize - pp->sectorsize, md)); } /* * The function is called when we had last close on provider and user requested * to close it when this situation occur. */ static void g_eli_last_close(void *arg, int flags __unused) { struct g_geom *gp; char gpname[64]; int error; g_topology_assert(); gp = arg; strlcpy(gpname, gp->name, sizeof(gpname)); error = g_eli_destroy(gp->softc, TRUE); KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).", gpname, error)); G_ELI_DEBUG(0, "Detached %s on last close.", gpname); } int g_eli_access(struct g_provider *pp, int dr, int dw, int de) { struct g_eli_softc *sc; struct g_geom *gp; gp = pp->geom; sc = gp->softc; if (dw > 0) { if (sc->sc_flags & G_ELI_FLAG_RO) { /* Deny write attempts. */ return (EROFS); } /* Someone is opening us for write, we need to remember that. */ sc->sc_flags |= G_ELI_FLAG_WOPEN; return (0); } /* Is this the last close? */ if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0) return (0); /* * Automatically detach on last close if requested. */ if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) || (sc->sc_flags & G_ELI_FLAG_WOPEN)) { g_post_event(g_eli_last_close, gp, M_WAITOK, NULL); } return (0); } static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif } struct g_geom * g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp, const struct g_eli_metadata *md, const u_char *mkey, int nkey) { struct g_eli_softc *sc; struct g_eli_worker *wr; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; struct g_geom_alias *gap; u_int i, threads; int dcw, error; G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX); KASSERT(eli_metadata_crypto_supported(md), ("%s: unsupported crypto for %s", __func__, bpp->name)); gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX); sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO); gp->start = g_eli_start; /* * Spoiling can happen even though we have the provider open * exclusively, e.g. through media change events. */ gp->spoiled = g_eli_orphan; gp->orphan = g_eli_orphan; gp->resize = g_eli_resize; gp->dumpconf = g_eli_dumpconf; /* * If detach-on-last-close feature is not enabled and we don't operate * on read-only provider, we can simply use g_std_access(). */ if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO)) gp->access = g_eli_access; else gp->access = g_std_access; eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize); sc->sc_nkey = nkey; gp->softc = sc; sc->sc_geom = gp; bioq_init(&sc->sc_queue); mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF); mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF); pp = NULL; cp = g_new_consumer(gp); error = g_attach(cp, bpp); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot attach to %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).", bpp->name, error); } goto failed; } /* * Keep provider open all the time, so we can run critical tasks, * like Master Keys deletion, without wondering if we can open * provider or not. * We don't open provider for writing only when user requested read-only * access. */ dcw = (sc->sc_flags & G_ELI_FLAG_RO) ? 0 : 1; error = g_access(cp, 1, dcw, 1); if (error != 0) { if (req != NULL) { gctl_error(req, "Cannot access %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot access %s (error=%d).", bpp->name, error); } goto failed; } /* * Remember the keys in our softc structure. */ g_eli_mkey_propagate(sc, mkey); LIST_INIT(&sc->sc_workers); threads = g_eli_threads; if (threads == 0) threads = mp_ncpus; sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus); for (i = 0; i < threads; i++) { if (g_eli_cpu_is_disabled(i)) { G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.", bpp->name, i); continue; } wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO); wr->w_softc = sc; wr->w_number = i; wr->w_active = TRUE; error = g_eli_newsession(wr); if (error != 0) { free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot set up crypto session " "for %s (error=%d).", bpp->name, error); } goto failed; } error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0, "g_eli[%u] %s", i, bpp->name); if (error != 0) { g_eli_freesession(wr); free(wr, M_ELI); if (req != NULL) { gctl_error(req, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } else { G_ELI_DEBUG(1, "Cannot create kernel thread " "for %s (error=%d).", bpp->name, error); } goto failed; } LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next); } /* * Create decrypted provider. */ pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX); pp->mediasize = sc->sc_mediasize; pp->sectorsize = sc->sc_sectorsize; LIST_FOREACH(gap, &bpp->aliases, ga_next) g_provider_add_alias(pp, "%s%s", gap->ga_alias, G_ELI_SUFFIX); g_error_provider(pp, 0); G_ELI_DEBUG(0, "Device %s created.", pp->name); G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo), sc->sc_ekeylen); if (sc->sc_flags & G_ELI_FLAG_AUTH) G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo)); G_ELI_DEBUG(0, " Crypto: %s", sc->sc_crypto == G_ELI_CRYPTO_SW_ACCEL ? "accelerated software" : sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware"); return (gp); failed: mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); /* * Wait for kernel threads self destruction. */ while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); if (cp->provider != NULL) { if (cp->acr == 1) g_access(cp, -1, -dcw, -1); g_detach(cp); } g_destroy_consumer(cp); g_destroy_geom(gp); g_eli_key_destroy(sc); bzero(sc, sizeof(*sc)); free(sc, M_ELI); return (NULL); } int g_eli_destroy(struct g_eli_softc *sc, boolean_t force) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (sc == NULL) return (ENXIO); gp = sc->sc_geom; pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_ELI_DEBUG(1, "Device %s is still open, so it " "cannot be definitely removed.", pp->name); sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; g_wither_provider(pp, ENXIO); return (EBUSY); } else { G_ELI_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } mtx_lock(&sc->sc_queue_mtx); sc->sc_flags |= G_ELI_FLAG_DESTROY; wakeup(sc); while (!LIST_EMPTY(&sc->sc_workers)) { msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:destroy", 0); } mtx_destroy(&sc->sc_queue_mtx); gp->softc = NULL; g_eli_key_destroy(sc); bzero(sc, sizeof(*sc)); free(sc, M_ELI); G_ELI_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom_close(gp, ENXIO); return (0); } static int g_eli_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_eli_softc *sc; sc = gp->softc; return (g_eli_destroy(sc, FALSE)); } static int g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider) { u_char *keyfile, *data; char *file, name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL && i == 0) { /* * If there is only one keyfile, allow simpler name. */ snprintf(name, sizeof(name), "%s:geli_keyfile", provider); keyfile = preload_search_by_type(name); } if (keyfile == NULL) return (i); /* Return number of loaded keyfiles. */ data = preload_fetch_addr(keyfile); if (data == NULL) { G_ELI_DEBUG(0, "Cannot find key file data for %s.", name); return (0); } size = preload_fetch_size(keyfile); if (size == 0) { G_ELI_DEBUG(0, "Cannot find key file size for %s.", name); return (0); } file = preload_search_info(keyfile, MODINFO_NAME); if (file == NULL) { G_ELI_DEBUG(0, "Cannot find key file name for %s.", name); return (0); } G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file, provider, name); g_eli_crypto_hmac_update(ctx, data, size); } } static void g_eli_keyfiles_clear(const char *provider) { u_char *keyfile, *data; char name[64]; size_t size; int i; for (i = 0; ; i++) { snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i); keyfile = preload_search_by_type(name); if (keyfile == NULL) return; data = preload_fetch_addr(keyfile); size = preload_fetch_size(keyfile); if (data != NULL && size != 0) bzero(data, size); } } /* * Tasting is only made on boot. * We detect providers which should be attached before root is mounted. */ static struct g_geom * g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_eli_metadata md; struct g_geom *gp; struct hmac_ctx ctx; char passphrase[256]; u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN]; u_int i, nkey, nkeyfiles, tries, showpass; int error; struct keybuf *keybuf; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); if (root_mounted() || g_eli_tries == 0) return (NULL); G_ELI_DEBUG(3, "Tasting %s.", pp->name); error = g_eli_read_metadata(mp, pp, &md); if (error != 0) return (NULL); gp = NULL; if (strcmp(md.md_magic, G_ELI_MAGIC) != 0) return (NULL); if (md.md_version > G_ELI_VERSION) { printf("geom_eli.ko module is too old to handle %s.\n", pp->name); return (NULL); } if (md.md_provsize != pp->mediasize) return (NULL); /* Should we attach it on boot? */ if (!(md.md_flags & G_ELI_FLAG_BOOT) && !(md.md_flags & G_ELI_FLAG_GELIBOOT)) return (NULL); if (md.md_keys == 0x00) { G_ELI_DEBUG(0, "No valid keys on %s.", pp->name); return (NULL); } if (!eli_metadata_crypto_supported(&md)) { G_ELI_DEBUG(0, "%s uses invalid or unsupported algorithms\n", pp->name); return (NULL); } if (md.md_iterations == -1) { /* If there is no passphrase, we try only once. */ tries = 1; } else { /* Ask for the passphrase no more than g_eli_tries times. */ tries = g_eli_tries; } if ((keybuf = get_keybuf()) != NULL) { /* Scan the key buffer, try all GELI keys. */ for (i = 0; i < keybuf->kb_nents; i++) { if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) { memcpy(key, keybuf->kb_ents[i].ke_data, sizeof(key)); if (g_eli_mkey_decrypt_any(&md, key, mkey, &nkey) == 0 ) { explicit_bzero(key, sizeof(key)); goto have_key; } } } } for (i = 0; i <= tries; i++) { g_eli_crypto_hmac_init(&ctx, NULL, 0); /* * Load all key files. */ nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name); if (nkeyfiles == 0 && md.md_iterations == -1) { /* * No key files and no passphrase, something is * definitely wrong here. * geli(8) doesn't allow for such situation, so assume * that there was really no passphrase and in that case * key files are no properly defined in loader.conf. */ G_ELI_DEBUG(0, "Found no key files in loader.conf for %s.", pp->name); return (NULL); } /* Ask for the passphrase if defined. */ if (md.md_iterations >= 0) { /* Try first with cached passphrase. */ if (i == 0) { if (!g_eli_boot_passcache) continue; memcpy(passphrase, cached_passphrase, sizeof(passphrase)); } else { printf("Enter passphrase for %s: ", pp->name); showpass = g_eli_visible_passphrase; if ((md.md_flags & G_ELI_FLAG_GELIDISPLAYPASS) != 0) showpass = GETS_ECHOPASS; cngets(passphrase, sizeof(passphrase), showpass); memcpy(cached_passphrase, passphrase, sizeof(passphrase)); } } /* * Prepare Derived-Key from the user passphrase. */ if (md.md_iterations == 0) { g_eli_crypto_hmac_update(&ctx, md.md_salt, sizeof(md.md_salt)); g_eli_crypto_hmac_update(&ctx, passphrase, strlen(passphrase)); explicit_bzero(passphrase, sizeof(passphrase)); } else if (md.md_iterations > 0) { u_char dkey[G_ELI_USERKEYLEN]; pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt, sizeof(md.md_salt), passphrase, md.md_iterations); bzero(passphrase, sizeof(passphrase)); g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey)); explicit_bzero(dkey, sizeof(dkey)); } g_eli_crypto_hmac_final(&ctx, key, 0); /* * Decrypt Master-Key. */ error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey); bzero(key, sizeof(key)); if (error == -1) { if (i == tries) { G_ELI_DEBUG(0, "Wrong key for %s. No tries left.", pp->name); g_eli_keyfiles_clear(pp->name); return (NULL); } if (i > 0) { G_ELI_DEBUG(0, "Wrong key for %s. Tries left: %u.", pp->name, tries - i); } /* Try again. */ continue; } else if (error > 0) { G_ELI_DEBUG(0, "Cannot decrypt Master Key for %s (error=%d).", pp->name, error); g_eli_keyfiles_clear(pp->name); return (NULL); } g_eli_keyfiles_clear(pp->name); G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); break; } have_key: /* * We have correct key, let's attach provider. */ gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey); bzero(mkey, sizeof(mkey)); bzero(&md, sizeof(md)); if (gp == NULL) { G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name, G_ELI_SUFFIX); return (NULL); } return (gp); } static void g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_eli_softc *sc; g_topology_assert(); sc = gp->softc; if (sc == NULL) return; if (pp != NULL || cp != NULL) return; /* Nothing here. */ sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_total); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_ekeys_allocated); sbuf_printf(sb, "%s", indent); if (sc->sc_flags == 0) sbuf_cat(sb, "NONE"); else { int first = 1; #define ADD_FLAG(flag, name) do { \ if (sc->sc_flags & (flag)) { \ if (!first) \ sbuf_cat(sb, ", "); \ else \ first = 0; \ sbuf_cat(sb, name); \ } \ } while (0) ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND"); ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY"); ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER"); ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME"); ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT"); ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH"); ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH"); ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH"); ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN"); ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY"); ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY"); ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE"); ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT"); ADD_FLAG(G_ELI_FLAG_GELIDISPLAYPASS, "GELIDISPLAYPASS"); ADD_FLAG(G_ELI_FLAG_AUTORESIZE, "AUTORESIZE"); #undef ADD_FLAG } sbuf_cat(sb, "\n"); if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) { sbuf_printf(sb, "%s%u\n", indent, sc->sc_nkey); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_version); sbuf_printf(sb, "%s", indent); switch (sc->sc_crypto) { case G_ELI_CRYPTO_HW: sbuf_cat(sb, "hardware"); break; case G_ELI_CRYPTO_SW: sbuf_cat(sb, "software"); break; case G_ELI_CRYPTO_SW_ACCEL: sbuf_cat(sb, "accelerated software"); break; default: sbuf_cat(sb, "UNKNOWN"); break; } sbuf_cat(sb, "\n"); if (sc->sc_flags & G_ELI_FLAG_AUTH) { sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_aalgo)); } sbuf_printf(sb, "%s%u\n", indent, sc->sc_ekeylen); sbuf_printf(sb, "%s%s\n", indent, g_eli_algo2str(sc->sc_ealgo)); sbuf_printf(sb, "%s%s\n", indent, (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE"); } static void g_eli_shutdown_pre_sync(void *arg, int howto) { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp; struct g_eli_softc *sc; int error; mp = arg; g_topology_lock(); LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { sc = gp->softc; if (sc == NULL) continue; pp = LIST_FIRST(&gp->provider); KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name)); if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0 || SCHEDULER_STOPPED()) { sc->sc_flags |= G_ELI_FLAG_RW_DETACH; gp->access = g_eli_access; } else { error = g_eli_destroy(sc, TRUE); } } g_topology_unlock(); } static void g_eli_init(struct g_class *mp) { g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); if (g_eli_pre_sync == NULL) G_ELI_DEBUG(0, "Warning! Cannot register shutdown event."); } static void g_eli_fini(struct g_class *mp) { if (g_eli_pre_sync != NULL) EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync); } DECLARE_GEOM_CLASS(g_eli_class, g_eli); MODULE_DEPEND(g_eli, crypto, 1, 1, 1); MODULE_VERSION(geom_eli, 0); Index: head/sys/geom/eli/g_eli_ctl.c =================================================================== --- head/sys/geom/eli/g_eli_ctl.c (revision 362623) +++ head/sys/geom/eli/g_eli_ctl.c (revision 362624) @@ -1,1235 +1,1232 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005-2011 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_ELI); static void g_eli_ctl_attach(struct gctl_req *req, struct g_class *mp) { struct g_eli_metadata md; struct g_provider *pp; const char *name; u_char *key, mkey[G_ELI_DATAIVKEYLEN]; int *nargs, *detach, *readonly, *dryrunp; int keysize, error, nkey, dryrun, dummy; intmax_t *valp; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs != 1) { gctl_error(req, "Invalid number of arguments."); return; } detach = gctl_get_paraml(req, "detach", sizeof(*detach)); if (detach == NULL) { gctl_error(req, "No '%s' argument.", "detach"); return; } /* "keyno" is optional for backward compatibility */ nkey = -1; valp = gctl_get_param(req, "keyno", &dummy); if (valp != NULL) { valp = gctl_get_paraml(req, "keyno", sizeof(*valp)); if (valp != NULL) nkey = *valp; } if (nkey < -1 || nkey >= G_ELI_MAXMKEYS) { gctl_error(req, "Invalid '%s' argument.", "keyno"); return; } readonly = gctl_get_paraml(req, "readonly", sizeof(*readonly)); if (readonly == NULL) { gctl_error(req, "No '%s' argument.", "readonly"); return; } /* "dryrun" is optional for backward compatibility */ dryrun = 0; dryrunp = gctl_get_param(req, "dryrun", &dummy); if (dryrunp != NULL) { dryrunp = gctl_get_paraml(req, "dryrun", sizeof(*dryrunp)); if (dryrunp != NULL) dryrun = *dryrunp; } if (*detach && *readonly) { gctl_error(req, "Options -d and -r are mutually exclusive."); return; } name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { gctl_error(req, "Provider %s is invalid.", name); return; } error = g_eli_read_metadata(mp, pp, &md); if (error != 0) { gctl_error(req, "Cannot read metadata from %s (error=%d).", name, error); return; } if (md.md_keys == 0x00) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "No valid keys on %s.", pp->name); return; } if (!eli_metadata_crypto_supported(&md)) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "Invalid or unsupported algorithms."); return; } key = gctl_get_param(req, "key", &keysize); if (key == NULL || keysize != G_ELI_USERKEYLEN) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "No '%s' argument.", "key"); return; } if (nkey == -1) error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey); else error = g_eli_mkey_decrypt(&md, key, mkey, nkey); explicit_bzero(key, keysize); if (error == -1) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "Wrong key for %s.", pp->name); return; } else if (error > 0) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "Cannot decrypt Master Key for %s (error=%d).", pp->name, error); return; } G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); if (*detach) md.md_flags |= G_ELI_FLAG_WO_DETACH; if (*readonly) md.md_flags |= G_ELI_FLAG_RO; if (!dryrun) g_eli_create(req, mp, pp, &md, mkey, nkey); explicit_bzero(mkey, sizeof(mkey)); explicit_bzero(&md, sizeof(md)); } static struct g_eli_softc * g_eli_find_device(struct g_class *mp, const char *prov) { struct g_eli_softc *sc; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; if (strncmp(prov, "/dev/", strlen("/dev/")) == 0) prov += strlen("/dev/"); LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc == NULL) continue; pp = LIST_FIRST(&gp->provider); if (pp != NULL && strcmp(pp->name, prov) == 0) return (sc); cp = LIST_FIRST(&gp->consumer); if (cp != NULL && cp->provider != NULL && strcmp(cp->provider->name, prov) == 0) { return (sc); } } return (NULL); } static void g_eli_ctl_detach(struct gctl_req *req, struct g_class *mp) { struct g_eli_softc *sc; int *force, *last, *nargs, error; const char *prov; char param[16]; int i; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No '%s' argument.", "force"); return; } last = gctl_get_paraml(req, "last", sizeof(*last)); if (last == NULL) { gctl_error(req, "No '%s' argument.", "last"); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); prov = gctl_get_asciiparam(req, param); if (prov == NULL) { gctl_error(req, "No 'arg%d' argument.", i); return; } sc = g_eli_find_device(mp, prov); if (sc == NULL) { gctl_error(req, "No such device: %s.", prov); return; } if (*last) { sc->sc_flags |= G_ELI_FLAG_RW_DETACH; sc->sc_geom->access = g_eli_access; } else { error = g_eli_destroy(sc, *force ? TRUE : FALSE); if (error != 0) { gctl_error(req, "Cannot destroy device %s (error=%d).", sc->sc_name, error); return; } } } } static void g_eli_ctl_onetime(struct gctl_req *req, struct g_class *mp) { struct g_eli_metadata md; struct g_provider *pp; const char *name; intmax_t *keylen, *sectorsize; u_char mkey[G_ELI_DATAIVKEYLEN]; int *nargs, *detach, *noautoresize, *notrim; g_topology_assert(); bzero(&md, sizeof(md)); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs != 1) { gctl_error(req, "Invalid number of arguments."); return; } strlcpy(md.md_magic, G_ELI_MAGIC, sizeof(md.md_magic)); md.md_version = G_ELI_VERSION; md.md_flags |= G_ELI_FLAG_ONETIME; md.md_flags |= G_ELI_FLAG_AUTORESIZE; detach = gctl_get_paraml(req, "detach", sizeof(*detach)); if (detach != NULL && *detach) md.md_flags |= G_ELI_FLAG_WO_DETACH; noautoresize = gctl_get_paraml(req, "noautoresize", sizeof(*noautoresize)); if (noautoresize != NULL && *noautoresize) md.md_flags &= ~G_ELI_FLAG_AUTORESIZE; notrim = gctl_get_paraml(req, "notrim", sizeof(*notrim)); if (notrim != NULL && *notrim) md.md_flags |= G_ELI_FLAG_NODELETE; md.md_ealgo = CRYPTO_ALGORITHM_MIN - 1; name = gctl_get_asciiparam(req, "aalgo"); if (name == NULL) { gctl_error(req, "No '%s' argument.", "aalgo"); return; } if (*name != '\0') { md.md_aalgo = g_eli_str2aalgo(name); if (md.md_aalgo >= CRYPTO_ALGORITHM_MIN && md.md_aalgo <= CRYPTO_ALGORITHM_MAX) { md.md_flags |= G_ELI_FLAG_AUTH; } else { /* * For backward compatibility, check if the -a option * was used to provide encryption algorithm. */ md.md_ealgo = g_eli_str2ealgo(name); if (md.md_ealgo < CRYPTO_ALGORITHM_MIN || md.md_ealgo > CRYPTO_ALGORITHM_MAX) { gctl_error(req, "Invalid authentication algorithm."); return; } else { gctl_error(req, "warning: The -e option, not " "the -a option is now used to specify " "encryption algorithm to use."); } } } if (md.md_ealgo < CRYPTO_ALGORITHM_MIN || md.md_ealgo > CRYPTO_ALGORITHM_MAX) { name = gctl_get_asciiparam(req, "ealgo"); if (name == NULL) { gctl_error(req, "No '%s' argument.", "ealgo"); return; } md.md_ealgo = g_eli_str2ealgo(name); if (md.md_ealgo < CRYPTO_ALGORITHM_MIN || md.md_ealgo > CRYPTO_ALGORITHM_MAX) { gctl_error(req, "Invalid encryption algorithm."); return; } } keylen = gctl_get_paraml(req, "keylen", sizeof(*keylen)); if (keylen == NULL) { gctl_error(req, "No '%s' argument.", "keylen"); return; } md.md_keylen = g_eli_keylen(md.md_ealgo, *keylen); if (md.md_keylen == 0) { gctl_error(req, "Invalid '%s' argument.", "keylen"); return; } /* Not important here. */ md.md_provsize = 0; /* Not important here. */ bzero(md.md_salt, sizeof(md.md_salt)); md.md_keys = 0x01; arc4rand(mkey, sizeof(mkey), 0); /* Not important here. */ bzero(md.md_hash, sizeof(md.md_hash)); name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { gctl_error(req, "Provider %s is invalid.", name); return; } sectorsize = gctl_get_paraml(req, "sectorsize", sizeof(*sectorsize)); if (sectorsize == NULL) { gctl_error(req, "No '%s' argument.", "sectorsize"); return; } if (*sectorsize == 0) md.md_sectorsize = pp->sectorsize; else { if (*sectorsize < 0 || (*sectorsize % pp->sectorsize) != 0) { gctl_error(req, "Invalid sector size."); return; } if (*sectorsize > PAGE_SIZE) { gctl_error(req, "warning: Using sectorsize bigger than " "the page size!"); } md.md_sectorsize = *sectorsize; } g_eli_create(req, mp, pp, &md, mkey, -1); explicit_bzero(mkey, sizeof(mkey)); explicit_bzero(&md, sizeof(md)); } static void g_eli_ctl_configure(struct gctl_req *req, struct g_class *mp) { struct g_eli_softc *sc; struct g_eli_metadata md; struct g_provider *pp; struct g_consumer *cp; char param[16]; const char *prov; u_char *sector; int *nargs, *boot, *noboot, *trim, *notrim, *geliboot, *nogeliboot; int *displaypass, *nodisplaypass, *autoresize, *noautoresize; int zero, error, changed; u_int i; g_topology_assert(); changed = 0; zero = 0; nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } boot = gctl_get_paraml(req, "boot", sizeof(*boot)); if (boot == NULL) boot = &zero; noboot = gctl_get_paraml(req, "noboot", sizeof(*noboot)); if (noboot == NULL) noboot = &zero; if (*boot && *noboot) { gctl_error(req, "Options -b and -B are mutually exclusive."); return; } if (*boot || *noboot) changed = 1; trim = gctl_get_paraml(req, "trim", sizeof(*trim)); if (trim == NULL) trim = &zero; notrim = gctl_get_paraml(req, "notrim", sizeof(*notrim)); if (notrim == NULL) notrim = &zero; if (*trim && *notrim) { gctl_error(req, "Options -t and -T are mutually exclusive."); return; } if (*trim || *notrim) changed = 1; geliboot = gctl_get_paraml(req, "geliboot", sizeof(*geliboot)); if (geliboot == NULL) geliboot = &zero; nogeliboot = gctl_get_paraml(req, "nogeliboot", sizeof(*nogeliboot)); if (nogeliboot == NULL) nogeliboot = &zero; if (*geliboot && *nogeliboot) { gctl_error(req, "Options -g and -G are mutually exclusive."); return; } if (*geliboot || *nogeliboot) changed = 1; displaypass = gctl_get_paraml(req, "displaypass", sizeof(*displaypass)); if (displaypass == NULL) displaypass = &zero; nodisplaypass = gctl_get_paraml(req, "nodisplaypass", sizeof(*nodisplaypass)); if (nodisplaypass == NULL) nodisplaypass = &zero; if (*displaypass && *nodisplaypass) { gctl_error(req, "Options -d and -D are mutually exclusive."); return; } if (*displaypass || *nodisplaypass) changed = 1; autoresize = gctl_get_paraml(req, "autoresize", sizeof(*autoresize)); if (autoresize == NULL) autoresize = &zero; noautoresize = gctl_get_paraml(req, "noautoresize", sizeof(*noautoresize)); if (noautoresize == NULL) noautoresize = &zero; if (*autoresize && *noautoresize) { gctl_error(req, "Options -r and -R are mutually exclusive."); return; } if (*autoresize || *noautoresize) changed = 1; if (!changed) { gctl_error(req, "No option given."); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); prov = gctl_get_asciiparam(req, param); if (prov == NULL) { gctl_error(req, "No 'arg%d' argument.", i); return; } sc = g_eli_find_device(mp, prov); if (sc == NULL) { /* * We ignore not attached providers, userland part will * take care of them. */ G_ELI_DEBUG(1, "Skipping configuration of not attached " "provider %s.", prov); continue; } if (sc->sc_flags & G_ELI_FLAG_RO) { gctl_error(req, "Cannot change configuration of " "read-only provider %s.", prov); continue; } if (*boot && (sc->sc_flags & G_ELI_FLAG_BOOT)) { G_ELI_DEBUG(1, "BOOT flag already configured for %s.", prov); continue; } else if (*noboot && !(sc->sc_flags & G_ELI_FLAG_BOOT)) { G_ELI_DEBUG(1, "BOOT flag not configured for %s.", prov); continue; } if (*notrim && (sc->sc_flags & G_ELI_FLAG_NODELETE)) { G_ELI_DEBUG(1, "TRIM disable flag already configured for %s.", prov); continue; } else if (*trim && !(sc->sc_flags & G_ELI_FLAG_NODELETE)) { G_ELI_DEBUG(1, "TRIM disable flag not configured for %s.", prov); continue; } if (*geliboot && (sc->sc_flags & G_ELI_FLAG_GELIBOOT)) { G_ELI_DEBUG(1, "GELIBOOT flag already configured for %s.", prov); continue; } else if (*nogeliboot && !(sc->sc_flags & G_ELI_FLAG_GELIBOOT)) { G_ELI_DEBUG(1, "GELIBOOT flag not configured for %s.", prov); continue; } if (*displaypass && (sc->sc_flags & G_ELI_FLAG_GELIDISPLAYPASS)) { G_ELI_DEBUG(1, "GELIDISPLAYPASS flag already configured for %s.", prov); continue; } else if (*nodisplaypass && !(sc->sc_flags & G_ELI_FLAG_GELIDISPLAYPASS)) { G_ELI_DEBUG(1, "GELIDISPLAYPASS flag not configured for %s.", prov); continue; } if (*autoresize && (sc->sc_flags & G_ELI_FLAG_AUTORESIZE)) { G_ELI_DEBUG(1, "AUTORESIZE flag already configured for %s.", prov); continue; } else if (*noautoresize && !(sc->sc_flags & G_ELI_FLAG_AUTORESIZE)) { G_ELI_DEBUG(1, "AUTORESIZE flag not configured for %s.", prov); continue; } if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) { /* * ONETIME providers don't write metadata to * disk, so don't try reading it. This means * we're bit-flipping uninitialized memory in md * below, but that's OK; we don't do anything * with it later. */ cp = LIST_FIRST(&sc->sc_geom->consumer); pp = cp->provider; error = g_eli_read_metadata(mp, pp, &md); if (error != 0) { gctl_error(req, "Cannot read metadata from %s (error=%d).", prov, error); continue; } } if (*boot) { md.md_flags |= G_ELI_FLAG_BOOT; sc->sc_flags |= G_ELI_FLAG_BOOT; } else if (*noboot) { md.md_flags &= ~G_ELI_FLAG_BOOT; sc->sc_flags &= ~G_ELI_FLAG_BOOT; } if (*notrim) { md.md_flags |= G_ELI_FLAG_NODELETE; sc->sc_flags |= G_ELI_FLAG_NODELETE; } else if (*trim) { md.md_flags &= ~G_ELI_FLAG_NODELETE; sc->sc_flags &= ~G_ELI_FLAG_NODELETE; } if (*geliboot) { md.md_flags |= G_ELI_FLAG_GELIBOOT; sc->sc_flags |= G_ELI_FLAG_GELIBOOT; } else if (*nogeliboot) { md.md_flags &= ~G_ELI_FLAG_GELIBOOT; sc->sc_flags &= ~G_ELI_FLAG_GELIBOOT; } if (*displaypass) { md.md_flags |= G_ELI_FLAG_GELIDISPLAYPASS; sc->sc_flags |= G_ELI_FLAG_GELIDISPLAYPASS; } else if (*nodisplaypass) { md.md_flags &= ~G_ELI_FLAG_GELIDISPLAYPASS; sc->sc_flags &= ~G_ELI_FLAG_GELIDISPLAYPASS; } if (*autoresize) { md.md_flags |= G_ELI_FLAG_AUTORESIZE; sc->sc_flags |= G_ELI_FLAG_AUTORESIZE; } else if (*noautoresize) { md.md_flags &= ~G_ELI_FLAG_AUTORESIZE; sc->sc_flags &= ~G_ELI_FLAG_AUTORESIZE; } if (sc->sc_flags & G_ELI_FLAG_ONETIME) { /* There's no metadata on disk so we are done here. */ continue; } sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO); eli_metadata_encode(&md, sector); error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { gctl_error(req, "Cannot store metadata on %s (error=%d).", prov, error); } explicit_bzero(&md, sizeof(md)); - explicit_bzero(sector, pp->sectorsize); - free(sector, M_ELI); + zfree(sector, M_ELI); } } static void g_eli_ctl_setkey(struct gctl_req *req, struct g_class *mp) { struct g_eli_softc *sc; struct g_eli_metadata md; struct g_provider *pp; struct g_consumer *cp; const char *name; u_char *key, *mkeydst, *sector; intmax_t *valp; int keysize, nkey, error; g_topology_assert(); name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } key = gctl_get_param(req, "key", &keysize); if (key == NULL || keysize != G_ELI_USERKEYLEN) { gctl_error(req, "No '%s' argument.", "key"); return; } sc = g_eli_find_device(mp, name); if (sc == NULL) { gctl_error(req, "Provider %s is invalid.", name); return; } if (sc->sc_flags & G_ELI_FLAG_RO) { gctl_error(req, "Cannot change keys for read-only provider."); return; } cp = LIST_FIRST(&sc->sc_geom->consumer); pp = cp->provider; error = g_eli_read_metadata(mp, pp, &md); if (error != 0) { gctl_error(req, "Cannot read metadata from %s (error=%d).", name, error); return; } valp = gctl_get_paraml(req, "keyno", sizeof(*valp)); if (valp == NULL) { gctl_error(req, "No '%s' argument.", "keyno"); return; } if (*valp != -1) nkey = *valp; else nkey = sc->sc_nkey; if (nkey < 0 || nkey >= G_ELI_MAXMKEYS) { gctl_error(req, "Invalid '%s' argument.", "keyno"); return; } valp = gctl_get_paraml(req, "iterations", sizeof(*valp)); if (valp == NULL) { gctl_error(req, "No '%s' argument.", "iterations"); return; } /* Check if iterations number should and can be changed. */ if (*valp != -1 && md.md_iterations == -1) { md.md_iterations = *valp; } else if (*valp != -1 && *valp != md.md_iterations) { if (bitcount32(md.md_keys) != 1) { gctl_error(req, "To be able to use '-i' option, only " "one key can be defined."); return; } if (md.md_keys != (1 << nkey)) { gctl_error(req, "Only already defined key can be " "changed when '-i' option is used."); return; } md.md_iterations = *valp; } mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN; md.md_keys |= (1 << nkey); bcopy(sc->sc_mkey, mkeydst, sizeof(sc->sc_mkey)); /* Encrypt Master Key with the new key. */ error = g_eli_mkey_encrypt(md.md_ealgo, key, md.md_keylen, mkeydst); explicit_bzero(key, keysize); if (error != 0) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "Cannot encrypt Master Key (error=%d).", error); return; } sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO); /* Store metadata with fresh key. */ eli_metadata_encode(&md, sector); explicit_bzero(&md, sizeof(md)); error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); - explicit_bzero(sector, pp->sectorsize); - free(sector, M_ELI); + zfree(sector, M_ELI); if (error != 0) { gctl_error(req, "Cannot store metadata on %s (error=%d).", pp->name, error); return; } G_ELI_DEBUG(1, "Key %u changed on %s.", nkey, pp->name); } static void g_eli_ctl_delkey(struct gctl_req *req, struct g_class *mp) { struct g_eli_softc *sc; struct g_eli_metadata md; struct g_provider *pp; struct g_consumer *cp; const char *name; u_char *mkeydst, *sector; intmax_t *valp; size_t keysize; int error, nkey, *all, *force; u_int i; g_topology_assert(); nkey = 0; /* fixes causeless gcc warning */ name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } sc = g_eli_find_device(mp, name); if (sc == NULL) { gctl_error(req, "Provider %s is invalid.", name); return; } if (sc->sc_flags & G_ELI_FLAG_RO) { gctl_error(req, "Cannot delete keys for read-only provider."); return; } cp = LIST_FIRST(&sc->sc_geom->consumer); pp = cp->provider; error = g_eli_read_metadata(mp, pp, &md); if (error != 0) { gctl_error(req, "Cannot read metadata from %s (error=%d).", name, error); return; } all = gctl_get_paraml(req, "all", sizeof(*all)); if (all == NULL) { gctl_error(req, "No '%s' argument.", "all"); return; } if (*all) { mkeydst = md.md_mkeys; keysize = sizeof(md.md_mkeys); } else { force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No '%s' argument.", "force"); return; } valp = gctl_get_paraml(req, "keyno", sizeof(*valp)); if (valp == NULL) { gctl_error(req, "No '%s' argument.", "keyno"); return; } if (*valp != -1) nkey = *valp; else nkey = sc->sc_nkey; if (nkey < 0 || nkey >= G_ELI_MAXMKEYS) { gctl_error(req, "Invalid '%s' argument.", "keyno"); return; } if (!(md.md_keys & (1 << nkey)) && !*force) { gctl_error(req, "Master Key %u is not set.", nkey); return; } md.md_keys &= ~(1 << nkey); if (md.md_keys == 0 && !*force) { gctl_error(req, "This is the last Master Key. Use '-f' " "flag if you really want to remove it."); return; } mkeydst = md.md_mkeys + nkey * G_ELI_MKEYLEN; keysize = G_ELI_MKEYLEN; } sector = malloc(pp->sectorsize, M_ELI, M_WAITOK | M_ZERO); for (i = 0; i <= g_eli_overwrites; i++) { if (i == g_eli_overwrites) explicit_bzero(mkeydst, keysize); else arc4rand(mkeydst, keysize, 0); /* Store metadata with destroyed key. */ eli_metadata_encode(&md, sector); error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); if (error != 0) { G_ELI_DEBUG(0, "Cannot store metadata on %s " "(error=%d).", pp->name, error); } /* * Flush write cache so we don't overwrite data N times in cache * and only once on disk. */ (void)g_io_flush(cp); } explicit_bzero(&md, sizeof(md)); - explicit_bzero(sector, pp->sectorsize); - free(sector, M_ELI); + zfree(sector, M_ELI); if (*all) G_ELI_DEBUG(1, "All keys removed from %s.", pp->name); else G_ELI_DEBUG(1, "Key %d removed from %s.", nkey, pp->name); } static void g_eli_suspend_one(struct g_eli_softc *sc, struct gctl_req *req) { struct g_eli_worker *wr; g_topology_assert(); KASSERT(sc != NULL, ("NULL sc")); if (sc->sc_flags & G_ELI_FLAG_ONETIME) { gctl_error(req, "Device %s is using one-time key, suspend not supported.", sc->sc_name); return; } mtx_lock(&sc->sc_queue_mtx); if (sc->sc_flags & G_ELI_FLAG_SUSPEND) { mtx_unlock(&sc->sc_queue_mtx); gctl_error(req, "Device %s already suspended.", sc->sc_name); return; } sc->sc_flags |= G_ELI_FLAG_SUSPEND; wakeup(sc); for (;;) { LIST_FOREACH(wr, &sc->sc_workers, w_next) { if (wr->w_active) break; } if (wr == NULL) break; /* Not all threads suspended. */ msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO, "geli:suspend", 0); } /* * Clear sensitive data on suspend, they will be recovered on resume. */ explicit_bzero(sc->sc_mkey, sizeof(sc->sc_mkey)); g_eli_key_destroy(sc); explicit_bzero(sc->sc_akey, sizeof(sc->sc_akey)); explicit_bzero(&sc->sc_akeyctx, sizeof(sc->sc_akeyctx)); explicit_bzero(sc->sc_ivkey, sizeof(sc->sc_ivkey)); explicit_bzero(&sc->sc_ivctx, sizeof(sc->sc_ivctx)); mtx_unlock(&sc->sc_queue_mtx); G_ELI_DEBUG(0, "Device %s has been suspended.", sc->sc_name); } static void g_eli_ctl_suspend(struct gctl_req *req, struct g_class *mp) { struct g_eli_softc *sc; int *all, *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } all = gctl_get_paraml(req, "all", sizeof(*all)); if (all == NULL) { gctl_error(req, "No '%s' argument.", "all"); return; } if (!*all && *nargs == 0) { gctl_error(req, "Too few arguments."); return; } if (*all) { struct g_geom *gp, *gp2; LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { sc = gp->softc; if (sc->sc_flags & G_ELI_FLAG_ONETIME) { G_ELI_DEBUG(0, "Device %s is using one-time key, suspend not supported, skipping.", sc->sc_name); continue; } g_eli_suspend_one(sc, req); } } else { const char *prov; char param[16]; int i; for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); prov = gctl_get_asciiparam(req, param); if (prov == NULL) { G_ELI_DEBUG(0, "No 'arg%d' argument.", i); continue; } sc = g_eli_find_device(mp, prov); if (sc == NULL) { G_ELI_DEBUG(0, "No such provider: %s.", prov); continue; } g_eli_suspend_one(sc, req); } } } static void g_eli_ctl_resume(struct gctl_req *req, struct g_class *mp) { struct g_eli_metadata md; struct g_eli_softc *sc; struct g_provider *pp; struct g_consumer *cp; const char *name; u_char *key, mkey[G_ELI_DATAIVKEYLEN]; int *nargs, keysize, error; u_int nkey; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs != 1) { gctl_error(req, "Invalid number of arguments."); return; } name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } key = gctl_get_param(req, "key", &keysize); if (key == NULL || keysize != G_ELI_USERKEYLEN) { gctl_error(req, "No '%s' argument.", "key"); return; } sc = g_eli_find_device(mp, name); if (sc == NULL) { gctl_error(req, "Provider %s is invalid.", name); return; } cp = LIST_FIRST(&sc->sc_geom->consumer); pp = cp->provider; error = g_eli_read_metadata(mp, pp, &md); if (error != 0) { gctl_error(req, "Cannot read metadata from %s (error=%d).", name, error); return; } if (md.md_keys == 0x00) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "No valid keys on %s.", pp->name); return; } error = g_eli_mkey_decrypt_any(&md, key, mkey, &nkey); explicit_bzero(key, keysize); if (error == -1) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "Wrong key for %s.", pp->name); return; } else if (error > 0) { explicit_bzero(&md, sizeof(md)); gctl_error(req, "Cannot decrypt Master Key for %s (error=%d).", pp->name, error); return; } G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name); mtx_lock(&sc->sc_queue_mtx); if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND)) gctl_error(req, "Device %s is not suspended.", name); else { /* Restore sc_mkey, sc_ekeys, sc_akey and sc_ivkey. */ g_eli_mkey_propagate(sc, mkey); sc->sc_flags &= ~G_ELI_FLAG_SUSPEND; G_ELI_DEBUG(1, "Resumed %s.", pp->name); wakeup(sc); } mtx_unlock(&sc->sc_queue_mtx); explicit_bzero(mkey, sizeof(mkey)); explicit_bzero(&md, sizeof(md)); } static int g_eli_kill_one(struct g_eli_softc *sc) { struct g_provider *pp; struct g_consumer *cp; int error = 0; g_topology_assert(); if (sc == NULL) return (ENOENT); pp = LIST_FIRST(&sc->sc_geom->provider); g_error_provider(pp, ENXIO); cp = LIST_FIRST(&sc->sc_geom->consumer); pp = cp->provider; if (sc->sc_flags & G_ELI_FLAG_RO) { G_ELI_DEBUG(0, "WARNING: Metadata won't be erased on read-only " "provider: %s.", pp->name); } else { u_char *sector; u_int i; int err; sector = malloc(pp->sectorsize, M_ELI, M_WAITOK); for (i = 0; i <= g_eli_overwrites; i++) { if (i == g_eli_overwrites) bzero(sector, pp->sectorsize); else arc4rand(sector, pp->sectorsize, 0); err = g_write_data(cp, pp->mediasize - pp->sectorsize, sector, pp->sectorsize); if (err != 0) { G_ELI_DEBUG(0, "Cannot erase metadata on %s " "(error=%d).", pp->name, err); if (error == 0) error = err; } /* * Flush write cache so we don't overwrite data N times * in cache and only once on disk. */ (void)g_io_flush(cp); } free(sector, M_ELI); } if (error == 0) G_ELI_DEBUG(0, "%s has been killed.", pp->name); g_eli_destroy(sc, TRUE); return (error); } static void g_eli_ctl_kill(struct gctl_req *req, struct g_class *mp) { int *all, *nargs; int error; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } all = gctl_get_paraml(req, "all", sizeof(*all)); if (all == NULL) { gctl_error(req, "No '%s' argument.", "all"); return; } if (!*all && *nargs == 0) { gctl_error(req, "Too few arguments."); return; } if (*all) { struct g_geom *gp, *gp2; LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { error = g_eli_kill_one(gp->softc); if (error != 0) gctl_error(req, "Not fully done."); } } else { struct g_eli_softc *sc; const char *prov; char param[16]; int i; for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); prov = gctl_get_asciiparam(req, param); if (prov == NULL) { G_ELI_DEBUG(0, "No 'arg%d' argument.", i); continue; } sc = g_eli_find_device(mp, prov); if (sc == NULL) { G_ELI_DEBUG(0, "No such provider: %s.", prov); continue; } error = g_eli_kill_one(sc); if (error != 0) gctl_error(req, "Not fully done."); } } } void g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } while (*version != G_ELI_VERSION) { if (G_ELI_VERSION == G_ELI_VERSION_06 && *version == G_ELI_VERSION_05) { /* Compatible. */ break; } if (G_ELI_VERSION == G_ELI_VERSION_07 && (*version == G_ELI_VERSION_05 || *version == G_ELI_VERSION_06)) { /* Compatible. */ break; } gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "attach") == 0) g_eli_ctl_attach(req, mp); else if (strcmp(verb, "detach") == 0 || strcmp(verb, "stop") == 0) g_eli_ctl_detach(req, mp); else if (strcmp(verb, "onetime") == 0) g_eli_ctl_onetime(req, mp); else if (strcmp(verb, "configure") == 0) g_eli_ctl_configure(req, mp); else if (strcmp(verb, "setkey") == 0) g_eli_ctl_setkey(req, mp); else if (strcmp(verb, "delkey") == 0) g_eli_ctl_delkey(req, mp); else if (strcmp(verb, "suspend") == 0) g_eli_ctl_suspend(req, mp); else if (strcmp(verb, "resume") == 0) g_eli_ctl_resume(req, mp); else if (strcmp(verb, "kill") == 0) g_eli_ctl_kill(req, mp); else gctl_error(req, "Unknown verb."); } Index: head/sys/geom/eli/g_eli_key_cache.c =================================================================== --- head/sys/geom/eli/g_eli_key_cache.c (revision 362623) +++ head/sys/geom/eli/g_eli_key_cache.c (revision 362624) @@ -1,389 +1,387 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011-2019 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #ifdef _KERNEL #include #include #include #include #endif /* _KERNEL */ #include #include #include #include #ifdef _KERNEL MALLOC_DECLARE(M_ELI); SYSCTL_DECL(_kern_geom_eli); /* * The default limit (8192 keys) will allow to cache all keys for 4TB * provider with 512 bytes sectors and will take around 1MB of memory. */ static u_int g_eli_key_cache_limit = 8192; SYSCTL_UINT(_kern_geom_eli, OID_AUTO, key_cache_limit, CTLFLAG_RDTUN, &g_eli_key_cache_limit, 0, "Maximum number of encryption keys to cache"); static uint64_t g_eli_key_cache_hits; SYSCTL_UQUAD(_kern_geom_eli, OID_AUTO, key_cache_hits, CTLFLAG_RW, &g_eli_key_cache_hits, 0, "Key cache hits"); static uint64_t g_eli_key_cache_misses; SYSCTL_UQUAD(_kern_geom_eli, OID_AUTO, key_cache_misses, CTLFLAG_RW, &g_eli_key_cache_misses, 0, "Key cache misses"); static int g_eli_key_cmp(const struct g_eli_key *a, const struct g_eli_key *b) { if (a->gek_keyno > b->gek_keyno) return (1); else if (a->gek_keyno < b->gek_keyno) return (-1); return (0); } #endif /* _KERNEL */ void g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key, uint64_t keyno) { const uint8_t *ekey; struct { char magic[4]; uint8_t keyno[8]; } __packed hmacdata; if ((sc->sc_flags & G_ELI_FLAG_ENC_IVKEY) != 0) ekey = sc->sc_mkey; else ekey = sc->sc_ekey; bcopy("ekey", hmacdata.magic, 4); le64enc(hmacdata.keyno, keyno); g_eli_crypto_hmac(ekey, G_ELI_MAXKEYLEN, (uint8_t *)&hmacdata, sizeof(hmacdata), key->gek_key, 0); key->gek_keyno = keyno; key->gek_count = 0; key->gek_magic = G_ELI_KEY_MAGIC; } #ifdef _KERNEL RB_PROTOTYPE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp); RB_GENERATE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp); static struct g_eli_key * g_eli_key_allocate(struct g_eli_softc *sc, uint64_t keyno) { struct g_eli_key *key, *ekey, keysearch; mtx_assert(&sc->sc_ekeys_lock, MA_OWNED); mtx_unlock(&sc->sc_ekeys_lock); key = malloc(sizeof(*key), M_ELI, M_WAITOK); g_eli_key_fill(sc, key, keyno); mtx_lock(&sc->sc_ekeys_lock); /* * Recheck if the key wasn't added while we weren't holding the lock. */ keysearch.gek_keyno = keyno; ekey = RB_FIND(g_eli_key_tree, &sc->sc_ekeys_tree, &keysearch); if (ekey != NULL) { - explicit_bzero(key, sizeof(*key)); - free(key, M_ELI); + zfree(key, M_ELI); key = ekey; TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next); } else { RB_INSERT(g_eli_key_tree, &sc->sc_ekeys_tree, key); sc->sc_ekeys_allocated++; } TAILQ_INSERT_TAIL(&sc->sc_ekeys_queue, key, gek_next); return (key); } static struct g_eli_key * g_eli_key_find_last(struct g_eli_softc *sc) { struct g_eli_key *key; mtx_assert(&sc->sc_ekeys_lock, MA_OWNED); TAILQ_FOREACH(key, &sc->sc_ekeys_queue, gek_next) { if (key->gek_count == 0) break; } return (key); } static void g_eli_key_replace(struct g_eli_softc *sc, struct g_eli_key *key, uint64_t keyno) { mtx_assert(&sc->sc_ekeys_lock, MA_OWNED); KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid magic.")); RB_REMOVE(g_eli_key_tree, &sc->sc_ekeys_tree, key); TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next); KASSERT(key->gek_count == 0, ("gek_count=%d", key->gek_count)); g_eli_key_fill(sc, key, keyno); RB_INSERT(g_eli_key_tree, &sc->sc_ekeys_tree, key); TAILQ_INSERT_TAIL(&sc->sc_ekeys_queue, key, gek_next); } static void g_eli_key_remove(struct g_eli_softc *sc, struct g_eli_key *key) { mtx_assert(&sc->sc_ekeys_lock, MA_OWNED); KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid magic.")); KASSERT(key->gek_count == 0, ("gek_count=%d", key->gek_count)); RB_REMOVE(g_eli_key_tree, &sc->sc_ekeys_tree, key); TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next); sc->sc_ekeys_allocated--; - explicit_bzero(key, sizeof(*key)); - free(key, M_ELI); + zfree(key, M_ELI); } void g_eli_key_init(struct g_eli_softc *sc) { uint8_t *mkey; mtx_lock(&sc->sc_ekeys_lock); mkey = sc->sc_mkey + sizeof(sc->sc_ivkey); if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0) bcopy(mkey, sc->sc_ekey, G_ELI_DATAKEYLEN); else { /* * The encryption key is: ekey = HMAC_SHA512(Data-Key, 0x10) */ g_eli_crypto_hmac(mkey, G_ELI_MAXKEYLEN, "\x10", 1, sc->sc_ekey, 0); } if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) { sc->sc_ekeys_total = 1; sc->sc_ekeys_allocated = 0; } else { off_t mediasize; size_t blocksize; if ((sc->sc_flags & G_ELI_FLAG_AUTH) != 0) { struct g_provider *pp; pp = LIST_FIRST(&sc->sc_geom->consumer)->provider; mediasize = pp->mediasize; blocksize = pp->sectorsize; } else { mediasize = sc->sc_mediasize; blocksize = sc->sc_sectorsize; } sc->sc_ekeys_total = ((mediasize - 1) >> G_ELI_KEY_SHIFT) / blocksize + 1; sc->sc_ekeys_allocated = 0; TAILQ_INIT(&sc->sc_ekeys_queue); RB_INIT(&sc->sc_ekeys_tree); if (sc->sc_ekeys_total <= g_eli_key_cache_limit) { uint64_t keyno; for (keyno = 0; keyno < sc->sc_ekeys_total; keyno++) (void)g_eli_key_allocate(sc, keyno); KASSERT(sc->sc_ekeys_total == sc->sc_ekeys_allocated, ("sc_ekeys_total=%ju != sc_ekeys_allocated=%ju", (uintmax_t)sc->sc_ekeys_total, (uintmax_t)sc->sc_ekeys_allocated)); } } mtx_unlock(&sc->sc_ekeys_lock); } void g_eli_key_destroy(struct g_eli_softc *sc) { mtx_lock(&sc->sc_ekeys_lock); if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) { explicit_bzero(sc->sc_ekey, sizeof(sc->sc_ekey)); } else { struct g_eli_key *key; while ((key = TAILQ_FIRST(&sc->sc_ekeys_queue)) != NULL) g_eli_key_remove(sc, key); TAILQ_INIT(&sc->sc_ekeys_queue); RB_INIT(&sc->sc_ekeys_tree); } mtx_unlock(&sc->sc_ekeys_lock); } void g_eli_key_resize(struct g_eli_softc *sc) { uint64_t new_ekeys_total; off_t mediasize; size_t blocksize; if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) { return; } mtx_lock(&sc->sc_ekeys_lock); if ((sc->sc_flags & G_ELI_FLAG_AUTH) != 0) { struct g_provider *pp; pp = LIST_FIRST(&sc->sc_geom->consumer)->provider; mediasize = pp->mediasize; blocksize = pp->sectorsize; } else { mediasize = sc->sc_mediasize; blocksize = sc->sc_sectorsize; } new_ekeys_total = ((mediasize - 1) >> G_ELI_KEY_SHIFT) / blocksize + 1; /* We only allow to grow. */ KASSERT(new_ekeys_total >= sc->sc_ekeys_total, ("new_ekeys_total=%ju < sc_ekeys_total=%ju", (uintmax_t)new_ekeys_total, (uintmax_t)sc->sc_ekeys_total)); if (new_ekeys_total <= g_eli_key_cache_limit) { uint64_t keyno; for (keyno = sc->sc_ekeys_total; keyno < new_ekeys_total; keyno++) { (void)g_eli_key_allocate(sc, keyno); } KASSERT(new_ekeys_total == sc->sc_ekeys_allocated, ("new_ekeys_total=%ju != sc_ekeys_allocated=%ju", (uintmax_t)new_ekeys_total, (uintmax_t)sc->sc_ekeys_allocated)); } sc->sc_ekeys_total = new_ekeys_total; mtx_unlock(&sc->sc_ekeys_lock); } /* * Select encryption key. If G_ELI_FLAG_SINGLE_KEY is present we only have one * key available for all the data. If the flag is not present select the key * based on data offset. */ uint8_t * g_eli_key_hold(struct g_eli_softc *sc, off_t offset, size_t blocksize) { struct g_eli_key *key, keysearch; uint64_t keyno; if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) return (sc->sc_ekey); /* We switch key every 2^G_ELI_KEY_SHIFT blocks. */ keyno = (offset >> G_ELI_KEY_SHIFT) / blocksize; KASSERT(keyno < sc->sc_ekeys_total, ("%s: keyno=%ju >= sc_ekeys_total=%ju", __func__, (uintmax_t)keyno, (uintmax_t)sc->sc_ekeys_total)); keysearch.gek_keyno = keyno; if (sc->sc_ekeys_total == sc->sc_ekeys_allocated) { /* We have all the keys, so avoid some overhead. */ key = RB_FIND(g_eli_key_tree, &sc->sc_ekeys_tree, &keysearch); KASSERT(key != NULL, ("No key %ju found.", (uintmax_t)keyno)); KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid key magic.")); return (key->gek_key); } mtx_lock(&sc->sc_ekeys_lock); key = RB_FIND(g_eli_key_tree, &sc->sc_ekeys_tree, &keysearch); if (key != NULL) { g_eli_key_cache_hits++; TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next); TAILQ_INSERT_TAIL(&sc->sc_ekeys_queue, key, gek_next); } else { /* * No key in cache, find the least recently unreferenced key * or allocate one if we haven't reached our limit yet. */ if (sc->sc_ekeys_allocated < g_eli_key_cache_limit) { key = g_eli_key_allocate(sc, keyno); } else { g_eli_key_cache_misses++; key = g_eli_key_find_last(sc); if (key != NULL) { g_eli_key_replace(sc, key, keyno); } else { /* All keys are referenced? Allocate one. */ key = g_eli_key_allocate(sc, keyno); } } } key->gek_count++; mtx_unlock(&sc->sc_ekeys_lock); KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid key magic.")); return (key->gek_key); } void g_eli_key_drop(struct g_eli_softc *sc, uint8_t *rawkey) { struct g_eli_key *key = (struct g_eli_key *)rawkey; if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) return; KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid key magic.")); if (sc->sc_ekeys_total == sc->sc_ekeys_allocated) return; mtx_lock(&sc->sc_ekeys_lock); KASSERT(key->gek_count > 0, ("key->gek_count=%d", key->gek_count)); key->gek_count--; while (sc->sc_ekeys_allocated > g_eli_key_cache_limit) { key = g_eli_key_find_last(sc); if (key == NULL) break; g_eli_key_remove(sc, key); } mtx_unlock(&sc->sc_ekeys_lock); } #endif /* _KERNEL */ Index: head/sys/geom/geom_dev.c =================================================================== --- head/sys/geom/geom_dev.c (revision 362623) +++ head/sys/geom/geom_dev.c (revision 362624) @@ -1,899 +1,896 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct g_dev_softc { struct mtx sc_mtx; struct cdev *sc_dev; struct cdev *sc_alias; int sc_open; u_int sc_active; #define SC_A_DESTROY (1 << 31) #define SC_A_OPEN (1 << 30) #define SC_A_ACTIVE (SC_A_OPEN - 1) }; static d_open_t g_dev_open; static d_close_t g_dev_close; static d_strategy_t g_dev_strategy; static d_ioctl_t g_dev_ioctl; static struct cdevsw g_dev_cdevsw = { .d_version = D_VERSION, .d_open = g_dev_open, .d_close = g_dev_close, .d_read = physread, .d_write = physwrite, .d_ioctl = g_dev_ioctl, .d_strategy = g_dev_strategy, .d_name = "g_dev", .d_flags = D_DISK | D_TRACKCLOSE, }; static g_init_t g_dev_init; static g_fini_t g_dev_fini; static g_taste_t g_dev_taste; static g_orphan_t g_dev_orphan; static g_attrchanged_t g_dev_attrchanged; static g_resize_t g_dev_resize; static struct g_class g_dev_class = { .name = "DEV", .version = G_VERSION, .init = g_dev_init, .fini = g_dev_fini, .taste = g_dev_taste, .orphan = g_dev_orphan, .attrchanged = g_dev_attrchanged, .resize = g_dev_resize }; /* * We target 262144 (8 x 32768) sectors by default as this significantly * increases the throughput on commonly used SSD's with a marginal * increase in non-interruptible request latency. */ static uint64_t g_dev_del_max_sectors = 262144; SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, dev, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_DEV stuff"); SYSCTL_QUAD(_kern_geom_dev, OID_AUTO, delete_max_sectors, CTLFLAG_RW, &g_dev_del_max_sectors, 0, "Maximum number of sectors in a single " "delete request sent to the provider. Larger requests are chunked " "so they can be interrupted. (0 = disable chunking)"); static char *dumpdev = NULL; static void g_dev_init(struct g_class *mp) { dumpdev = kern_getenv("dumpdev"); } static void g_dev_fini(struct g_class *mp) { freeenv(dumpdev); dumpdev = NULL; } static int g_dev_setdumpdev(struct cdev *dev, struct diocskerneldump_arg *kda) { struct g_kerneldump kd; struct g_consumer *cp; int error, len; MPASS(dev != NULL && kda != NULL); MPASS(kda->kda_index != KDA_REMOVE); cp = dev->si_drv2; len = sizeof(kd); memset(&kd, 0, len); kd.offset = 0; kd.length = OFF_MAX; error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd); if (error != 0) return (error); error = dumper_insert(&kd.di, devtoname(dev), kda); if (error == 0) dev->si_flags |= SI_DUMPDEV; return (error); } static int init_dumpdev(struct cdev *dev) { struct diocskerneldump_arg kda; struct g_consumer *cp; const char *devprefix = "/dev/", *devname; int error; size_t len; bzero(&kda, sizeof(kda)); kda.kda_index = KDA_APPEND; if (dumpdev == NULL) return (0); len = strlen(devprefix); devname = devtoname(dev); if (strcmp(devname, dumpdev) != 0 && (strncmp(dumpdev, devprefix, len) != 0 || strcmp(devname, dumpdev + len) != 0)) return (0); cp = (struct g_consumer *)dev->si_drv2; error = g_access(cp, 1, 0, 0); if (error != 0) return (error); error = g_dev_setdumpdev(dev, &kda); if (error == 0) { freeenv(dumpdev); dumpdev = NULL; } (void)g_access(cp, -1, 0, 0); return (error); } static void g_dev_destroy(void *arg, int flags __unused) { struct g_consumer *cp; struct g_geom *gp; struct g_dev_softc *sc; char buf[SPECNAMELEN + 6]; g_topology_assert(); cp = arg; gp = cp->geom; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_destroy(%p(%s))", cp, gp->name); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify_f("GEOM", "DEV", "DESTROY", buf, M_WAITOK); if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); } void g_dev_print(void) { struct g_geom *gp; char const *p = ""; LIST_FOREACH(gp, &g_dev_class.geom, geom) { printf("%s%s", p, gp->name); p = " "; } printf("\n"); } static void g_dev_set_physpath(struct g_consumer *cp) { struct g_dev_softc *sc; char *physpath; int error, physpath_len; if (g_access(cp, 1, 0, 0) != 0) return; sc = cp->private; physpath_len = MAXPATHLEN; physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO); error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath); g_access(cp, -1, 0, 0); if (error == 0 && strlen(physpath) != 0) { struct cdev *dev, *old_alias_dev; struct cdev **alias_devp; dev = sc->sc_dev; old_alias_dev = sc->sc_alias; alias_devp = (struct cdev **)&sc->sc_alias; make_dev_physpath_alias(MAKEDEV_WAITOK, alias_devp, dev, old_alias_dev, physpath); } else if (sc->sc_alias) { destroy_dev((struct cdev *)sc->sc_alias); sc->sc_alias = NULL; } g_free(physpath); } static void g_dev_set_media(struct g_consumer *cp) { struct g_dev_softc *sc; struct cdev *dev; char buf[SPECNAMELEN + 6]; sc = cp->private; dev = sc->sc_dev; snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); dev = sc->sc_alias; if (dev != NULL) { snprintf(buf, sizeof(buf), "cdev=%s", dev->si_name); devctl_notify_f("DEVFS", "CDEV", "MEDIACHANGE", buf, M_WAITOK); devctl_notify_f("GEOM", "DEV", "MEDIACHANGE", buf, M_WAITOK); } } static void g_dev_attrchanged(struct g_consumer *cp, const char *attr) { if (strcmp(attr, "GEOM::media") == 0) { g_dev_set_media(cp); return; } if (strcmp(attr, "GEOM::physpath") == 0) { g_dev_set_physpath(cp); return; } } static void g_dev_resize(struct g_consumer *cp) { char buf[SPECNAMELEN + 6]; snprintf(buf, sizeof(buf), "cdev=%s", cp->provider->name); devctl_notify_f("GEOM", "DEV", "SIZECHANGE", buf, M_WAITOK); } struct g_provider * g_dev_getprovider(struct cdev *dev) { struct g_consumer *cp; g_topology_assert(); if (dev == NULL) return (NULL); if (dev->si_devsw != &g_dev_cdevsw) return (NULL); cp = dev->si_drv2; return (cp->provider); } static struct g_geom * g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) { struct g_geom *gp; struct g_geom_alias *gap; struct g_consumer *cp; struct g_dev_softc *sc; int error; struct cdev *dev, *adev; char buf[SPECNAMELEN + 6]; struct make_dev_args args; g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name); g_topology_assert(); gp = g_new_geomf(mp, "%s", pp->name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); cp->private = sc; cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); KASSERT(error == 0, ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error)); make_dev_args_init(&args); args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; args.mda_devsw = &g_dev_cdevsw; args.mda_cr = NULL; args.mda_uid = UID_ROOT; args.mda_gid = GID_OPERATOR; args.mda_mode = 0640; args.mda_si_drv1 = sc; args.mda_si_drv2 = cp; error = make_dev_s(&args, &sc->sc_dev, "%s", gp->name); if (error != 0) { printf("%s: make_dev_p() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); mtx_destroy(&sc->sc_mtx); g_free(sc); return (NULL); } dev = sc->sc_dev; dev->si_flags |= SI_UNMAPPED; dev->si_iosize_max = MAXPHYS; error = init_dumpdev(dev); if (error != 0) printf("%s: init_dumpdev() failed (gp->name=%s, error=%d)\n", __func__, gp->name, error); g_dev_attrchanged(cp, "GEOM::physpath"); snprintf(buf, sizeof(buf), "cdev=%s", gp->name); devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK); /* * Now add all the aliases for this drive */ LIST_FOREACH(gap, &pp->aliases, ga_next) { error = make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &adev, dev, "%s", gap->ga_alias); if (error) { printf("%s: make_dev_alias_p() failed (name=%s, error=%d)\n", __func__, gap->ga_alias, error); continue; } snprintf(buf, sizeof(buf), "cdev=%s", gap->ga_alias); devctl_notify_f("GEOM", "DEV", "CREATE", buf, M_WAITOK); } return (gp); } static int g_dev_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? 1 : 0; w = flags & FWRITE ? 1 : 0; #ifdef notyet e = flags & O_EXCL ? 1 : 0; #else e = 0; #endif /* * This happens on attempt to open a device node with O_EXEC. */ if (r + w + e == 0) return (EINVAL); if (w) { /* * When running in very secure mode, do not allow * opens for writing of any disks. */ error = securelevel_ge(td->td_ucred, 2); if (error) return (error); } g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); if (error == 0) { sc = dev->si_drv1; mtx_lock(&sc->sc_mtx); if (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0) wakeup(&sc->sc_active); sc->sc_open += r + w + e; if (sc->sc_open == 0) atomic_clear_int(&sc->sc_active, SC_A_OPEN); else atomic_set_int(&sc->sc_active, SC_A_OPEN); mtx_unlock(&sc->sc_mtx); } return (error); } static int g_dev_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct g_consumer *cp; struct g_dev_softc *sc; int error, r, w, e; cp = dev->si_drv2; g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)", cp->geom->name, flags, fmt, td); r = flags & FREAD ? -1 : 0; w = flags & FWRITE ? -1 : 0; #ifdef notyet e = flags & O_EXCL ? -1 : 0; #else e = 0; #endif /* * The vgonel(9) - caused by eg. forced unmount of devfs - calls * VOP_CLOSE(9) on devfs vnode without any FREAD or FWRITE flags, * which would result in zero deltas, which in turn would cause * panic in g_access(9). * * Note that we cannot zero the counters (ie. do "r = cp->acr" * etc) instead, because the consumer might be opened in another * devfs instance. */ if (r + w + e == 0) return (EINVAL); sc = dev->si_drv1; mtx_lock(&sc->sc_mtx); sc->sc_open += r + w + e; if (sc->sc_open == 0) atomic_clear_int(&sc->sc_active, SC_A_OPEN); else atomic_set_int(&sc->sc_active, SC_A_OPEN); while (sc->sc_open == 0 && (sc->sc_active & SC_A_ACTIVE) != 0) msleep(&sc->sc_active, &sc->sc_mtx, 0, "g_dev_close", hz / 10); mtx_unlock(&sc->sc_mtx); g_topology_lock(); error = g_access(cp, r, w, e); g_topology_unlock(); return (error); } static int g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct g_consumer *cp; struct g_provider *pp; off_t offset, length, chunk, odd; int i, error; #ifdef COMPAT_FREEBSD12 struct diocskerneldump_arg kda_copy; #endif cp = dev->si_drv2; pp = cp->provider; /* If consumer or provider is dying, don't disturb. */ if (cp->flags & G_CF_ORPHAN) return (ENXIO); if (pp->error) return (pp->error); error = 0; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_ioctl")); i = IOCPARM_LEN(cmd); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = pp->sectorsize; if (*(u_int *)data == 0) error = ENOENT; break; case DIOCGMEDIASIZE: *(off_t *)data = pp->mediasize; if (*(off_t *)data == 0) error = ENOENT; break; case DIOCGFWSECTORS: error = g_io_getattr("GEOM::fwsectors", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFWHEADS: error = g_io_getattr("GEOM::fwheads", cp, &i, data); if (error == 0 && *(u_int *)data == 0) error = ENOENT; break; case DIOCGFRONTSTUFF: error = g_io_getattr("GEOM::frontstuff", cp, &i, data); break; #ifdef COMPAT_FREEBSD11 case DIOCSKERNELDUMP_FREEBSD11: { struct diocskerneldump_arg kda; gone_in(13, "FreeBSD 11.x ABI compat"); bzero(&kda, sizeof(kda)); kda.kda_encryption = KERNELDUMP_ENC_NONE; kda.kda_index = (*(u_int *)data ? 0 : KDA_REMOVE_ALL); if (kda.kda_index == KDA_REMOVE_ALL) error = dumper_remove(devtoname(dev), &kda); else error = g_dev_setdumpdev(dev, &kda); break; } #endif #ifdef COMPAT_FREEBSD12 case DIOCSKERNELDUMP_FREEBSD12: { struct diocskerneldump_arg_freebsd12 *kda12; gone_in(14, "FreeBSD 12.x ABI compat"); kda12 = (void *)data; memcpy(&kda_copy, kda12, sizeof(kda_copy)); kda_copy.kda_index = (kda12->kda12_enable ? 0 : KDA_REMOVE_ALL); explicit_bzero(kda12, sizeof(*kda12)); /* Kludge to pass kda_copy to kda in fallthrough. */ data = (void *)&kda_copy; } /* FALLTHROUGH */ #endif case DIOCSKERNELDUMP: { struct diocskerneldump_arg *kda; uint8_t *encryptedkey; kda = (struct diocskerneldump_arg *)data; if (kda->kda_index == KDA_REMOVE_ALL || kda->kda_index == KDA_REMOVE_DEV || kda->kda_index == KDA_REMOVE) { error = dumper_remove(devtoname(dev), kda); explicit_bzero(kda, sizeof(*kda)); break; } if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { if (kda->kda_encryptedkeysize == 0 || kda->kda_encryptedkeysize > KERNELDUMP_ENCKEY_MAX_SIZE) { explicit_bzero(kda, sizeof(*kda)); return (EINVAL); } encryptedkey = malloc(kda->kda_encryptedkeysize, M_TEMP, M_WAITOK); error = copyin(kda->kda_encryptedkey, encryptedkey, kda->kda_encryptedkeysize); } else { encryptedkey = NULL; } if (error == 0) { kda->kda_encryptedkey = encryptedkey; error = g_dev_setdumpdev(dev, kda); } - if (encryptedkey != NULL) { - explicit_bzero(encryptedkey, kda->kda_encryptedkeysize); - free(encryptedkey, M_TEMP); - } + zfree(encryptedkey, M_TEMP); explicit_bzero(kda, sizeof(*kda)); break; } case DIOCGFLUSH: error = g_io_flush(cp); break; case DIOCGDELETE: offset = ((off_t *)data)[0]; length = ((off_t *)data)[1]; if ((offset % pp->sectorsize) != 0 || (length % pp->sectorsize) != 0 || length <= 0) { printf("%s: offset=%jd length=%jd\n", __func__, offset, length); error = EINVAL; break; } if ((pp->mediasize > 0) && (offset >= pp->mediasize)) { /* * Catch out-of-bounds requests here. The problem is * that due to historical GEOM I/O implementation * peculatities, g_delete_data() would always return * success for requests starting just the next byte * after providers media boundary. Condition check on * non-zero media size, since that condition would * (most likely) cause ENXIO instead. */ error = EIO; break; } while (length > 0) { chunk = length; if (g_dev_del_max_sectors != 0 && chunk > g_dev_del_max_sectors * pp->sectorsize) { chunk = g_dev_del_max_sectors * pp->sectorsize; if (pp->stripesize > 0) { odd = (offset + chunk + pp->stripeoffset) % pp->stripesize; if (chunk > odd) chunk -= odd; } } error = g_delete_data(cp, offset, chunk); length -= chunk; offset += chunk; if (error) break; /* * Since the request size can be large, the service * time can be is likewise. We make this ioctl * interruptible by checking for signals for each bio. */ if (SIGPENDING(td)) break; } break; case DIOCGIDENT: error = g_io_getattr("GEOM::ident", cp, &i, data); break; case DIOCGPROVIDERNAME: strlcpy(data, pp->name, i); break; case DIOCGSTRIPESIZE: *(off_t *)data = pp->stripesize; break; case DIOCGSTRIPEOFFSET: *(off_t *)data = pp->stripeoffset; break; case DIOCGPHYSPATH: error = g_io_getattr("GEOM::physpath", cp, &i, data); if (error == 0 && *(char *)data == '\0') error = ENOENT; break; case DIOCGATTR: { struct diocgattr_arg *arg = (struct diocgattr_arg *)data; if (arg->len > sizeof(arg->value)) { error = EINVAL; break; } error = g_io_getattr(arg->name, cp, &arg->len, &arg->value); break; } case DIOCZONECMD: { struct disk_zone_args *zone_args =(struct disk_zone_args *)data; struct disk_zone_rep_entry *new_entries, *old_entries; struct disk_zone_report *rep; size_t alloc_size; old_entries = NULL; new_entries = NULL; rep = NULL; alloc_size = 0; if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES) { rep = &zone_args->zone_params.report; #define MAXENTRIES (MAXPHYS / sizeof(struct disk_zone_rep_entry)) if (rep->entries_allocated > MAXENTRIES) rep->entries_allocated = MAXENTRIES; alloc_size = rep->entries_allocated * sizeof(struct disk_zone_rep_entry); if (alloc_size != 0) new_entries = g_malloc(alloc_size, M_WAITOK| M_ZERO); old_entries = rep->entries; rep->entries = new_entries; } error = g_io_zonecmd(zone_args, cp); if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES && alloc_size != 0 && error == 0) error = copyout(new_entries, old_entries, alloc_size); if (old_entries != NULL && rep != NULL) rep->entries = old_entries; if (new_entries != NULL) g_free(new_entries); break; } default: if (pp->geom->ioctl != NULL) { error = pp->geom->ioctl(pp, cmd, data, fflag, td); } else { error = ENOIOCTL; } } return (error); } static void g_dev_done(struct bio *bp2) { struct g_consumer *cp; struct g_dev_softc *sc; struct bio *bp; int active; cp = bp2->bio_from; sc = cp->private; bp = bp2->bio_parent; bp->bio_error = bp2->bio_error; bp->bio_completed = bp2->bio_completed; bp->bio_resid = bp->bio_length - bp2->bio_completed; if (bp2->bio_cmd == BIO_ZONE) bcopy(&bp2->bio_zone, &bp->bio_zone, sizeof(bp->bio_zone)); if (bp2->bio_error != 0) { g_trace(G_T_BIO, "g_dev_done(%p) had error %d", bp2, bp2->bio_error); bp->bio_flags |= BIO_ERROR; } else { g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd", bp2, bp, bp2->bio_resid, (intmax_t)bp2->bio_completed); } g_destroy_bio(bp2); active = atomic_fetchadd_int(&sc->sc_active, -1) - 1; if ((active & SC_A_ACTIVE) == 0) { if ((active & SC_A_OPEN) == 0) wakeup(&sc->sc_active); if (active & SC_A_DESTROY) g_post_event(g_dev_destroy, cp, M_NOWAIT, NULL); } biodone(bp); } static void g_dev_strategy(struct bio *bp) { struct g_consumer *cp; struct bio *bp2; struct cdev *dev; struct g_dev_softc *sc; KASSERT(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE || bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_ZONE, ("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd)); dev = bp->bio_dev; cp = dev->si_drv2; KASSERT(cp->acr || cp->acw, ("Consumer with zero access count in g_dev_strategy")); biotrack(bp, __func__); #ifdef INVARIANTS if ((bp->bio_offset % cp->provider->sectorsize) != 0 || (bp->bio_bcount % cp->provider->sectorsize) != 0) { bp->bio_resid = bp->bio_bcount; biofinish(bp, NULL, EINVAL); return; } #endif sc = dev->si_drv1; KASSERT(sc->sc_open > 0, ("Closed device in g_dev_strategy")); atomic_add_int(&sc->sc_active, 1); for (;;) { /* * XXX: This is not an ideal solution, but I believe it to * XXX: deadlock safely, all things considered. */ bp2 = g_clone_bio(bp); if (bp2 != NULL) break; pause("gdstrat", hz / 10); } KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place")); bp2->bio_done = g_dev_done; g_trace(G_T_BIO, "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d", bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length, bp2->bio_data, bp2->bio_cmd); g_io_request(bp2, cp); KASSERT(cp->acr || cp->acw, ("g_dev_strategy raced with g_dev_close and lost")); } /* * g_dev_callback() * * Called by devfs when asynchronous device destruction is completed. * - Mark that we have no attached device any more. * - If there are no outstanding requests, schedule geom destruction. * Otherwise destruction will be scheduled later by g_dev_done(). */ static void g_dev_callback(void *arg) { struct g_consumer *cp; struct g_dev_softc *sc; int active; cp = arg; sc = cp->private; g_trace(G_T_TOPOLOGY, "g_dev_callback(%p(%s))", cp, cp->geom->name); sc->sc_dev = NULL; sc->sc_alias = NULL; active = atomic_fetchadd_int(&sc->sc_active, SC_A_DESTROY); if ((active & SC_A_ACTIVE) == 0) g_post_event(g_dev_destroy, cp, M_WAITOK, NULL); } /* * g_dev_orphan() * * Called from below when the provider orphaned us. * - Clear any dump settings. * - Request asynchronous device destruction to prevent any more requests * from coming in. The provider is already marked with an error, so * anything which comes in the interim will be returned immediately. */ static void g_dev_orphan(struct g_consumer *cp) { struct cdev *dev; struct g_dev_softc *sc; g_topology_assert(); sc = cp->private; dev = sc->sc_dev; g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, cp->geom->name); /* Reset any dump-area set on this device */ if (dev->si_flags & SI_DUMPDEV) { struct diocskerneldump_arg kda; bzero(&kda, sizeof(kda)); kda.kda_index = KDA_REMOVE_DEV; (void)dumper_remove(devtoname(dev), &kda); } /* Destroy the struct cdev *so we get no more requests */ delist_dev(dev); destroy_dev_sched_cb(dev, g_dev_callback, cp); } DECLARE_GEOM_CLASS(g_dev_class, g_dev); Index: head/sys/kern/kern_environment.c =================================================================== --- head/sys/kern/kern_environment.c (revision 362623) +++ head/sys/kern/kern_environment.c (revision 362624) @@ -1,1008 +1,1007 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The unified bootloader passes us a pointer to a preserved copy of * bootstrap/kernel environment variables. We convert them to a * dynamic array of strings later when the VM subsystem is up. * * We make these available through the kenv(2) syscall for userland * and through kern_getenv()/freeenv() kern_setenv() kern_unsetenv() testenv() for * the kernel. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static char *_getenv_dynamic_locked(const char *name, int *idx); static char *_getenv_dynamic(const char *name, int *idx); static MALLOC_DEFINE(M_KENV, "kenv", "kernel environment"); #define KENV_SIZE 512 /* Maximum number of environment strings */ static uma_zone_t kenv_zone; static int kenv_mvallen = KENV_MVALLEN; /* pointer to the config-generated static environment */ char *kern_envp; /* pointer to the md-static environment */ char *md_envp; static int md_env_len; static int md_env_pos; static char *kernenv_next(char *); /* dynamic environment variables */ char **kenvp; struct mtx kenv_lock; /* * No need to protect this with a mutex since SYSINITS are single threaded. */ bool dynamic_kenv; #define KENV_CHECK if (!dynamic_kenv) \ panic("%s: called before SI_SUB_KMEM", __func__) static char *getenv_string_buffer(const char *); int sys_kenv(td, uap) struct thread *td; struct kenv_args /* { int what; const char *name; char *value; int len; } */ *uap; { char *name, *value, *buffer = NULL; size_t len, done, needed, buflen; int error, i; KASSERT(dynamic_kenv, ("kenv: dynamic_kenv = false")); error = 0; if (uap->what == KENV_DUMP) { #ifdef MAC error = mac_kenv_check_dump(td->td_ucred); if (error) return (error); #endif done = needed = 0; buflen = uap->len; if (buflen > KENV_SIZE * (KENV_MNAMELEN + kenv_mvallen + 2)) buflen = KENV_SIZE * (KENV_MNAMELEN + kenv_mvallen + 2); if (uap->len > 0 && uap->value != NULL) buffer = malloc(buflen, M_TEMP, M_WAITOK|M_ZERO); mtx_lock(&kenv_lock); for (i = 0; kenvp[i] != NULL; i++) { len = strlen(kenvp[i]) + 1; needed += len; len = min(len, buflen - done); /* * If called with a NULL or insufficiently large * buffer, just keep computing the required size. */ if (uap->value != NULL && buffer != NULL && len > 0) { bcopy(kenvp[i], buffer + done, len); done += len; } } mtx_unlock(&kenv_lock); if (buffer != NULL) { error = copyout(buffer, uap->value, done); free(buffer, M_TEMP); } td->td_retval[0] = ((done == needed) ? 0 : needed); return (error); } switch (uap->what) { case KENV_SET: error = priv_check(td, PRIV_KENV_SET); if (error) return (error); break; case KENV_UNSET: error = priv_check(td, PRIV_KENV_UNSET); if (error) return (error); break; } name = malloc(KENV_MNAMELEN + 1, M_TEMP, M_WAITOK); error = copyinstr(uap->name, name, KENV_MNAMELEN + 1, NULL); if (error) goto done; switch (uap->what) { case KENV_GET: #ifdef MAC error = mac_kenv_check_get(td->td_ucred, name); if (error) goto done; #endif value = kern_getenv(name); if (value == NULL) { error = ENOENT; goto done; } len = strlen(value) + 1; if (len > uap->len) len = uap->len; error = copyout(value, uap->value, len); freeenv(value); if (error) goto done; td->td_retval[0] = len; break; case KENV_SET: len = uap->len; if (len < 1) { error = EINVAL; goto done; } if (len > kenv_mvallen + 1) len = kenv_mvallen + 1; value = malloc(len, M_TEMP, M_WAITOK); error = copyinstr(uap->value, value, len, NULL); if (error) { free(value, M_TEMP); goto done; } #ifdef MAC error = mac_kenv_check_set(td->td_ucred, name, value); if (error == 0) #endif kern_setenv(name, value); free(value, M_TEMP); break; case KENV_UNSET: #ifdef MAC error = mac_kenv_check_unset(td->td_ucred, name); if (error) goto done; #endif error = kern_unsetenv(name); if (error) error = ENOENT; break; default: error = EINVAL; break; } done: free(name, M_TEMP); return (error); } /* * Populate the initial kernel environment. * * This is called very early in MD startup, either to provide a copy of the * environment obtained from a boot loader, or to provide an empty buffer into * which MD code can store an initial environment using kern_setenv() calls. * * kern_envp is set to the static_env generated by config(8). This implements * the env keyword described in config(5). * * If len is non-zero, the caller is providing an empty buffer. The caller will * subsequently use kern_setenv() to add up to len bytes of initial environment * before the dynamic environment is available. * * If len is zero, the caller is providing a pre-loaded buffer containing * environment strings. Additional strings cannot be added until the dynamic * environment is available. The memory pointed to must remain stable at least * until sysinit runs init_dynamic_kenv() and preferably until after SI_SUB_KMEM * is finished so that subr_hints routines may continue to use it until the * environments have been fully merged at the end of the pass. If no initial * environment is available from the boot loader, passing a NULL pointer allows * the static_env to be installed if it is configured. In this case, any call * to kern_setenv() prior to the setup of the dynamic environment will result in * a panic. */ void init_static_kenv(char *buf, size_t len) { char *eval; KASSERT(!dynamic_kenv, ("kenv: dynamic_kenv already initialized")); /* * Suitably sized means it must be able to hold at least one empty * variable, otherwise things go belly up if a kern_getenv call is * made without a prior call to kern_setenv as we have a malformed * environment. */ KASSERT(len == 0 || len >= 2, ("kenv: static env must be initialized or suitably sized")); KASSERT(len == 0 || (*buf == '\0' && *(buf + 1) == '\0'), ("kenv: sized buffer must be initially empty")); /* * We may be called twice, with the second call needed to relocate * md_envp after enabling paging. md_envp is then garbage if it is * not null and the relocation will move it. Discard it so as to * not crash using its old value in our first call to kern_getenv(). * * The second call gives the same environment as the first except * in silly configurations where the static env disables itself. * * Other env calls don't handle possibly-garbage pointers, so must * not be made between enabling paging and calling here. */ md_envp = NULL; md_env_len = 0; md_env_pos = 0; /* * Give the static environment a chance to disable the loader(8) * environment first. This is done with loader_env.disabled=1. * * static_env and static_hints may both be disabled, but in slightly * different ways. For static_env, we just don't setup kern_envp and * it's as if a static env wasn't even provided. For static_hints, * we effectively zero out the buffer to stop the rest of the kernel * from being able to use it. * * We're intentionally setting this up so that static_hints.disabled may * be specified in either the MD env or the static env. This keeps us * consistent in our new world view. * * As a warning, the static environment may not be disabled in any way * if the static environment has disabled the loader environment. */ kern_envp = static_env; eval = kern_getenv("loader_env.disabled"); if (eval == NULL || strcmp(eval, "1") != 0) { md_envp = buf; md_env_len = len; md_env_pos = 0; eval = kern_getenv("static_env.disabled"); if (eval != NULL && strcmp(eval, "1") == 0) { kern_envp[0] = '\0'; kern_envp[1] = '\0'; } } eval = kern_getenv("static_hints.disabled"); if (eval != NULL && strcmp(eval, "1") == 0) { static_hints[0] = '\0'; static_hints[1] = '\0'; } } static void init_dynamic_kenv_from(char *init_env, int *curpos) { char *cp, *cpnext, *eqpos, *found; size_t len; int i; if (init_env && *init_env != '\0') { found = NULL; i = *curpos; for (cp = init_env; cp != NULL; cp = cpnext) { cpnext = kernenv_next(cp); len = strlen(cp) + 1; if (len > KENV_MNAMELEN + 1 + kenv_mvallen + 1) { printf( "WARNING: too long kenv string, ignoring %s\n", cp); goto sanitize; } eqpos = strchr(cp, '='); if (eqpos == NULL) { printf( "WARNING: malformed static env value, ignoring %s\n", cp); goto sanitize; } *eqpos = 0; /* * De-dupe the environment as we go. We don't add the * duplicated assignments because config(8) will flip * the order of the static environment around to make * kernel processing match the order of specification * in the kernel config. */ found = _getenv_dynamic_locked(cp, NULL); *eqpos = '='; if (found != NULL) goto sanitize; if (i > KENV_SIZE) { printf( "WARNING: too many kenv strings, ignoring %s\n", cp); goto sanitize; } kenvp[i] = malloc(len, M_KENV, M_WAITOK); strcpy(kenvp[i++], cp); sanitize: explicit_bzero(cp, len - 1); } *curpos = i; } } /* * Setup the dynamic kernel environment. */ static void init_dynamic_kenv(void *data __unused) { int dynamic_envpos; int size; TUNABLE_INT_FETCH("kenv_mvallen", &kenv_mvallen); size = KENV_MNAMELEN + 1 + kenv_mvallen + 1; kenv_zone = uma_zcreate("kenv", size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); kenvp = malloc((KENV_SIZE + 1) * sizeof(char *), M_KENV, M_WAITOK | M_ZERO); dynamic_envpos = 0; init_dynamic_kenv_from(md_envp, &dynamic_envpos); init_dynamic_kenv_from(kern_envp, &dynamic_envpos); kenvp[dynamic_envpos] = NULL; mtx_init(&kenv_lock, "kernel environment", NULL, MTX_DEF); dynamic_kenv = true; } SYSINIT(kenv, SI_SUB_KMEM + 1, SI_ORDER_FIRST, init_dynamic_kenv, NULL); void freeenv(char *env) { if (dynamic_kenv && env != NULL) { explicit_bzero(env, strlen(env)); uma_zfree(kenv_zone, env); } } /* * Internal functions for string lookup. */ static char * _getenv_dynamic_locked(const char *name, int *idx) { char *cp; int len, i; len = strlen(name); for (cp = kenvp[0], i = 0; cp != NULL; cp = kenvp[++i]) { if ((strncmp(cp, name, len) == 0) && (cp[len] == '=')) { if (idx != NULL) *idx = i; return (cp + len + 1); } } return (NULL); } static char * _getenv_dynamic(const char *name, int *idx) { mtx_assert(&kenv_lock, MA_OWNED); return (_getenv_dynamic_locked(name, idx)); } static char * _getenv_static_from(char *chkenv, const char *name) { char *cp, *ep; int len; for (cp = chkenv; cp != NULL; cp = kernenv_next(cp)) { for (ep = cp; (*ep != '=') && (*ep != 0); ep++) ; if (*ep != '=') continue; len = ep - cp; ep++; if (!strncmp(name, cp, len) && name[len] == 0) return (ep); } return (NULL); } static char * _getenv_static(const char *name) { char *val; val = _getenv_static_from(md_envp, name); if (val != NULL) return (val); val = _getenv_static_from(kern_envp, name); if (val != NULL) return (val); return (NULL); } /* * Look up an environment variable by name. * Return a pointer to the string if found. * The pointer has to be freed with freeenv() * after use. */ char * kern_getenv(const char *name) { char *ret; if (dynamic_kenv) { ret = getenv_string_buffer(name); if (ret == NULL) { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "getenv"); } } else ret = _getenv_static(name); return (ret); } /* * Test if an environment variable is defined. */ int testenv(const char *name) { char *cp; if (dynamic_kenv) { mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, NULL); mtx_unlock(&kenv_lock); } else cp = _getenv_static(name); if (cp != NULL) return (1); return (0); } /* * Set an environment variable in the MD-static environment. This cannot * feasibly be done on config(8)-generated static environments as they don't * generally include space for extra variables. */ static int setenv_static(const char *name, const char *value) { int len; if (md_env_pos >= md_env_len) return (-1); /* Check space for x=y and two nuls */ len = strlen(name) + strlen(value); if (len + 3 < md_env_len - md_env_pos) { len = sprintf(&md_envp[md_env_pos], "%s=%s", name, value); md_env_pos += len+1; md_envp[md_env_pos] = '\0'; return (0); } else return (-1); } /* * Set an environment variable by name. */ int kern_setenv(const char *name, const char *value) { char *buf, *cp, *oldenv; int namelen, vallen, i; if (!dynamic_kenv && md_env_len > 0) return (setenv_static(name, value)); KENV_CHECK; namelen = strlen(name) + 1; if (namelen > KENV_MNAMELEN + 1) return (-1); vallen = strlen(value) + 1; if (vallen > kenv_mvallen + 1) return (-1); buf = malloc(namelen + vallen, M_KENV, M_WAITOK); sprintf(buf, "%s=%s", name, value); mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, &i); if (cp != NULL) { oldenv = kenvp[i]; kenvp[i] = buf; mtx_unlock(&kenv_lock); free(oldenv, M_KENV); } else { /* We add the option if it wasn't found */ for (i = 0; (cp = kenvp[i]) != NULL; i++) ; /* Bounds checking */ if (i < 0 || i >= KENV_SIZE) { free(buf, M_KENV); mtx_unlock(&kenv_lock); return (-1); } kenvp[i] = buf; kenvp[i + 1] = NULL; mtx_unlock(&kenv_lock); } return (0); } /* * Unset an environment variable string. */ int kern_unsetenv(const char *name) { char *cp, *oldenv; int i, j; KENV_CHECK; mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, &i); if (cp != NULL) { oldenv = kenvp[i]; for (j = i + 1; kenvp[j] != NULL; j++) kenvp[i++] = kenvp[j]; kenvp[i] = NULL; mtx_unlock(&kenv_lock); - explicit_bzero(oldenv, strlen(oldenv)); - free(oldenv, M_KENV); + zfree(oldenv, M_KENV); return (0); } mtx_unlock(&kenv_lock); return (-1); } /* * Return a buffer containing the string value from an environment variable */ static char * getenv_string_buffer(const char *name) { char *cp, *ret; int len; if (dynamic_kenv) { len = KENV_MNAMELEN + 1 + kenv_mvallen + 1; ret = uma_zalloc(kenv_zone, M_WAITOK | M_ZERO); mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, NULL); if (cp != NULL) strlcpy(ret, cp, len); mtx_unlock(&kenv_lock); if (cp == NULL) { uma_zfree(kenv_zone, ret); ret = NULL; } } else ret = _getenv_static(name); return (ret); } /* * Return a string value from an environment variable. */ int getenv_string(const char *name, char *data, int size) { char *cp; if (dynamic_kenv) { mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, NULL); if (cp != NULL) strlcpy(data, cp, size); mtx_unlock(&kenv_lock); } else { cp = _getenv_static(name); if (cp != NULL) strlcpy(data, cp, size); } return (cp != NULL); } /* * Return an array of integers at the given type size and signedness. */ int getenv_array(const char *name, void *pdata, int size, int *psize, int type_size, bool allow_signed) { uint8_t shift; int64_t value; int64_t old; char *buf; char *end; char *ptr; int n; int rc; if ((buf = getenv_string_buffer(name)) == NULL) return (0); rc = 0; /* assume failure */ /* get maximum number of elements */ size /= type_size; n = 0; for (ptr = buf; *ptr != 0; ) { value = strtoq(ptr, &end, 0); /* check if signed numbers are allowed */ if (value < 0 && !allow_signed) goto error; /* check for invalid value */ if (ptr == end) goto error; /* check for valid suffix */ switch (*end) { case 't': case 'T': shift = 40; end++; break; case 'g': case 'G': shift = 30; end++; break; case 'm': case 'M': shift = 20; end++; break; case 'k': case 'K': shift = 10; end++; break; case ' ': case '\t': case ',': case 0: shift = 0; break; default: /* garbage after numeric value */ goto error; } /* skip till next value, if any */ while (*end == '\t' || *end == ',' || *end == ' ') end++; /* update pointer */ ptr = end; /* apply shift */ old = value; value <<= shift; /* overflow check */ if ((value >> shift) != old) goto error; /* check for buffer overflow */ if (n >= size) goto error; /* store value according to type size */ switch (type_size) { case 1: if (allow_signed) { if (value < SCHAR_MIN || value > SCHAR_MAX) goto error; } else { if (value < 0 || value > UCHAR_MAX) goto error; } ((uint8_t *)pdata)[n] = (uint8_t)value; break; case 2: if (allow_signed) { if (value < SHRT_MIN || value > SHRT_MAX) goto error; } else { if (value < 0 || value > USHRT_MAX) goto error; } ((uint16_t *)pdata)[n] = (uint16_t)value; break; case 4: if (allow_signed) { if (value < INT_MIN || value > INT_MAX) goto error; } else { if (value > UINT_MAX) goto error; } ((uint32_t *)pdata)[n] = (uint32_t)value; break; case 8: ((uint64_t *)pdata)[n] = (uint64_t)value; break; default: goto error; } n++; } *psize = n * type_size; if (n != 0) rc = 1; /* success */ error: if (dynamic_kenv) uma_zfree(kenv_zone, buf); return (rc); } /* * Return an integer value from an environment variable. */ int getenv_int(const char *name, int *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (int) tmp; return (rval); } /* * Return an unsigned integer value from an environment variable. */ int getenv_uint(const char *name, unsigned int *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (unsigned int) tmp; return (rval); } /* * Return an int64_t value from an environment variable. */ int getenv_int64(const char *name, int64_t *data) { quad_t tmp; int64_t rval; rval = getenv_quad(name, &tmp); if (rval) *data = (int64_t) tmp; return (rval); } /* * Return an uint64_t value from an environment variable. */ int getenv_uint64(const char *name, uint64_t *data) { quad_t tmp; uint64_t rval; rval = getenv_quad(name, &tmp); if (rval) *data = (uint64_t) tmp; return (rval); } /* * Return a long value from an environment variable. */ int getenv_long(const char *name, long *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (long) tmp; return (rval); } /* * Return an unsigned long value from an environment variable. */ int getenv_ulong(const char *name, unsigned long *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (unsigned long) tmp; return (rval); } /* * Return a quad_t value from an environment variable. */ int getenv_quad(const char *name, quad_t *data) { char *value, *vtp; quad_t iv; value = getenv_string_buffer(name); if (value == NULL) return (0); iv = strtoq(value, &vtp, 0); if (vtp == value || (vtp[0] != '\0' && vtp[1] != '\0')) { freeenv(value); return (0); } switch (vtp[0]) { case 't': case 'T': iv *= 1024; /* FALLTHROUGH */ case 'g': case 'G': iv *= 1024; /* FALLTHROUGH */ case 'm': case 'M': iv *= 1024; /* FALLTHROUGH */ case 'k': case 'K': iv *= 1024; case '\0': break; default: freeenv(value); return (0); } freeenv(value); *data = iv; return (1); } /* * Find the next entry after the one which (cp) falls within, return a * pointer to its start or NULL if there are no more. */ static char * kernenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } void tunable_int_init(void *data) { struct tunable_int *d = (struct tunable_int *)data; TUNABLE_INT_FETCH(d->path, d->var); } void tunable_long_init(void *data) { struct tunable_long *d = (struct tunable_long *)data; TUNABLE_LONG_FETCH(d->path, d->var); } void tunable_ulong_init(void *data) { struct tunable_ulong *d = (struct tunable_ulong *)data; TUNABLE_ULONG_FETCH(d->path, d->var); } void tunable_int64_init(void *data) { struct tunable_int64 *d = (struct tunable_int64 *)data; TUNABLE_INT64_FETCH(d->path, d->var); } void tunable_uint64_init(void *data) { struct tunable_uint64 *d = (struct tunable_uint64 *)data; TUNABLE_UINT64_FETCH(d->path, d->var); } void tunable_quad_init(void *data) { struct tunable_quad *d = (struct tunable_quad *)data; TUNABLE_QUAD_FETCH(d->path, d->var); } void tunable_str_init(void *data) { struct tunable_str *d = (struct tunable_str *)data; TUNABLE_STR_FETCH(d->path, d->var, d->size); } Index: head/sys/kern/kern_shutdown.c =================================================================== --- head/sys/kern/kern_shutdown.c (revision 362623) +++ head/sys/kern/kern_shutdown.c (revision 362624) @@ -1,1761 +1,1750 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ekcd.h" #include "opt_kdb.h" #include "opt_panic.h" #include "opt_printf.h" #include "opt_sched.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, &panic_reboot_wait_time, 0, "Seconds to wait before rebooting after a panic"); /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef KDB #ifdef KDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &debugger_on_panic, 0, "Run debugger on kernel panic"); int debugger_on_trap = 0; SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, CTLFLAG_RWTUN | CTLFLAG_SECURE, &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); #ifdef KDB_TRACE static int trace_on_panic = 1; static bool trace_all_panics = true; #else static int trace_on_panic = 0; static bool trace_all_panics = false; #endif SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &trace_on_panic, 0, "Print stack trace on kernel panic"); SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); #endif /* KDB */ static int sync_on_panic = 0; SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); static bool poweroff_on_panic = 0; SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); static bool powercycle_on_panic = 0; SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Shutdown environment"); #ifndef DIAGNOSTIC static int show_busybufs; #else static int show_busybufs = 1; #endif SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, &show_busybufs, 0, "Show busy buffers during shutdown"); int suspend_blocked = 0; SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, &suspend_blocked, 0, "Block suspend due to a pending shutdown"); #ifdef EKCD FEATURE(ekcd, "Encrypted kernel crash dumps support"); MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); struct kerneldumpcrypto { uint8_t kdc_encryption; uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; union { struct { keyInstance aes_ki; cipherInstance aes_ci; } u_aes; struct chacha_ctx u_chacha; } u; #define kdc_ki u.u_aes.aes_ki #define kdc_ci u.u_aes.aes_ci #define kdc_chacha u.u_chacha uint32_t kdc_dumpkeysize; struct kerneldumpkey kdc_dumpkey[]; }; #endif struct kerneldumpcomp { uint8_t kdc_format; struct compressor *kdc_stream; uint8_t *kdc_buf; size_t kdc_resid; }; static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression); static void kerneldumpcomp_destroy(struct dumperinfo *di); static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); static int kerneldump_gzlevel = 6; SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, &kerneldump_gzlevel, 0, "Kernel crash dump compression level"); /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; bool __read_frequently panicked; int __read_mostly dumping; /* system is dumping */ int rebooting; /* system is rebooting */ /* * Used to serialize between sysctl kern.shutdown.dumpdevname and list * modifications via ioctl. */ static struct mtx dumpconf_list_lk; MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); /* Our selected dumper(s). */ static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = TAILQ_HEAD_INITIALIZER(dumper_configs); /* Context information for dump-debuggers. */ static struct pcb dumppcb; /* Registers. */ lwpid_t dumptid; /* Thread ID. */ static struct cdevsw reroot_cdevsw = { .d_version = D_VERSION, .d_name = "reroot", }; static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); static int kern_reroot(void); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* * The only reason this exists is to create the /dev/reroot/ directory, * used by reroot code in init(8) as a mountpoint for tmpfs. */ static void reroot_conf(void *unused) { int error; struct cdev *cdev; error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); if (error != 0) { printf("%s: failed to create device node, error %d", __func__, error); } } SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); /* * The system call that results in a reboot. */ /* ARGSUSED */ int sys_reboot(struct thread *td, struct reboot_args *uap) { int error; error = 0; #ifdef MAC error = mac_system_check_reboot(td->td_ucred, uap->opt); #endif if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { if (uap->opt & RB_REROOT) error = kern_reroot(); else kern_reboot(uap->opt); } return (error); } static void shutdown_nice_task_fn(void *arg, int pending __unused) { int howto; howto = (uintptr_t)arg; /* Send a signal to init(8) and have it shutdown the world. */ PROC_LOCK(initproc); if (howto & RB_POWEROFF) kern_psignal(initproc, SIGUSR2); else if (howto & RB_POWERCYCLE) kern_psignal(initproc, SIGWINCH); else if (howto & RB_HALT) kern_psignal(initproc, SIGUSR1); else kern_psignal(initproc, SIGINT); PROC_UNLOCK(initproc); } static struct task shutdown_nice_task = TASK_INITIALIZER(0, &shutdown_nice_task_fn, NULL); /* * Called by events that want to shut down.. e.g on a PC */ void shutdown_nice(int howto) { if (initproc != NULL && !SCHEDULER_STOPPED()) { shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); } else { /* * No init(8) running, or scheduler would not allow it * to run, so simply reboot. */ kern_reboot(howto | RB_NOSYNC); } } static void print_uptime(void) { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", (long)ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", (long)ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", (long)ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", (long)ts.tv_sec); } int doadump(boolean_t textdump) { boolean_t coredump; int error; error = 0; if (dumping) return (EBUSY); if (TAILQ_EMPTY(&dumper_configs)) return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; coredump = TRUE; #ifdef DDB if (textdump && textdump_pending) { coredump = FALSE; textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); } #endif if (coredump) { struct dumperinfo *di; TAILQ_FOREACH(di, &dumper_configs, di_next) { error = dumpsys(di); if (error == 0) break; } } dumping--; return (error); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void kern_reboot(int howto) { static int once = 0; /* * Normal paths here don't hold Giant, but we can wind up here * unexpectedly with it held. Drop it now so we don't have to * drop and pick it up elsewhere. The paths it is locking will * never be returned to, and it is preferable to preclude * deadlock than to lock against code that won't ever * continue. */ while (mtx_owned(&Giant)) mtx_unlock(&Giant); #if defined(SMP) /* * Bind us to the first CPU so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ if (!SCHEDULER_STOPPED()) { thread_lock(curthread); sched_bind(curthread, CPU_FIRST()); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), ("boot: not running on cpu 0")); } #endif /* We're in the process of rebooting. */ rebooting = 1; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { once = 1; bufshutdown(show_busybufs); } print_uptime(); cngrab(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * The system call that results in changing the rootfs. */ static int kern_reroot(void) { struct vnode *oldrootvnode, *vp; struct mount *mp, *devmp; int error; if (curproc != initproc) return (EPERM); /* * Mark the filesystem containing currently-running executable * (the temporary copy of init(8)) busy. */ vp = curproc->p_textvp; error = vn_lock(vp, LK_SHARED); if (error != 0) return (error); mp = vp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vp); error = vfs_busy(mp, 0); vn_lock(vp, LK_SHARED | LK_RETRY); vfs_rel(mp); if (error != 0) { VOP_UNLOCK(vp); return (ENOENT); } if (VN_IS_DOOMED(vp)) { VOP_UNLOCK(vp); vfs_unbusy(mp); return (ENOENT); } } VOP_UNLOCK(vp); /* * Remove the filesystem containing currently-running executable * from the mount list, to prevent it from being unmounted * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). * * Also preserve /dev - forcibly unmounting it could cause driver * reinitialization. */ vfs_ref(rootdevmp); devmp = rootdevmp; rootdevmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); TAILQ_REMOVE(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); oldrootvnode = rootvnode; /* * Unmount everything except for the two filesystems preserved above. */ vfs_unmountall(); /* * Add /dev back; vfs_mountroot() will move it into its new place. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); rootdevmp = devmp; vfs_rel(rootdevmp); /* * Mount the new rootfs. */ vfs_mountroot(); /* * Update all references to the old rootvnode. */ mountcheckdirs(oldrootvnode, rootvnode); /* * Add the temporary filesystem back and unbusy it. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_unbusy(mp); return (0); } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); wdog_kern_pat(WD_TO_NEVER); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (panic_reboot_wait_time != 0) { if (panic_reboot_wait_time != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", panic_reboot_wait_time); for (loop = panic_reboot_wait_time * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot,\n"); printf("--> or switch off the system now.\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* * Acquiring smp_ipi_mtx here has a double effect: * - it disables interrupts avoiding CPU0 preemption * by fast handlers (thus deadlocking against other CPUs) * - it avoids deadlocks against smp_rendezvous() or, more * generally, threads busy-waiting, with this spinlock held, * and waiting for responses by threads on other CPUs * (ie. smp_tlb_shootdown()). * * For the !SMP case it just needs to handle the former problem. */ #ifdef SMP mtx_lock_spin(&smp_ipi_mtx); #else spinlock_enter(); #endif /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } #if defined(WITNESS) || defined(INVARIANT_SUPPORT) static int kassert_warn_only = 0; #ifdef KDB static int kassert_do_kdb = 0; #endif #ifdef KTR static int kassert_do_ktr = 0; #endif static int kassert_do_log = 1; static int kassert_log_pps_limit = 4; static int kassert_log_mute_at = 0; static int kassert_log_panic_at = 0; static int kassert_suppress_in_panic = 0; static int kassert_warnings = 0; SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "kassert options"); #ifdef KASSERT_PANIC_OPTIONAL #define KASSERT_RWTUN CTLFLAG_RWTUN #else #define KASSERT_RWTUN CTLFLAG_RDTUN #endif SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, &kassert_warn_only, 0, "KASSERT triggers a panic (0) or just a warning (1)"); #ifdef KDB SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, &kassert_do_kdb, 0, "KASSERT will enter the debugger"); #endif #ifdef KTR SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, &kassert_do_ktr, 0, "KASSERT does a KTR, set this to the KTRMASK you want"); #endif SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, &kassert_do_log, 0, "If warn_only is enabled, log (1) or do not log (0) assertion violations"); SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, &kassert_warnings, 0, "number of KASSERTs that have been triggered"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, &kassert_log_pps_limit, 0, "limit number of log messages per second"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, &kassert_log_mute_at, 0, "max number of KASSERTS to log"); SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, &kassert_suppress_in_panic, 0, "KASSERTs will be suppressed while handling a panic"); #undef KASSERT_RWTUN static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_NEEDGIANT, NULL, 0, kassert_sysctl_kassert, "I", "set to trigger a test kassert"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); return (0); } #ifdef KASSERT_PANIC_OPTIONAL /* * Called by KASSERT, this decides if we will panic * or if we will log via printf and/or ktr. */ void kassert_panic(const char *fmt, ...) { static char buf[256]; va_list ap; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); /* * If we are suppressing secondary panics, log the warning but do not * re-enter panic/kdb. */ if (panicstr != NULL && kassert_suppress_in_panic) { if (kassert_do_log) { printf("KASSERT failed: %s\n", buf); #ifdef KDB if (trace_all_panics && trace_on_panic) kdb_backtrace(); #endif } return; } /* * panic if we're not just warning, or if we've exceeded * kassert_log_panic_at warnings. */ if (!kassert_warn_only || (kassert_log_panic_at > 0 && kassert_warnings >= kassert_log_panic_at)) { va_start(ap, fmt); vpanic(fmt, ap); /* NORETURN */ } #ifdef KTR if (kassert_do_ktr) CTR0(ktr_mask, buf); #endif /* KTR */ /* * log if we've not yet met the mute limit. */ if (kassert_do_log && (kassert_log_mute_at == 0 || kassert_warnings < kassert_log_mute_at)) { static struct timeval lasterr; static int curerr; if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { printf("KASSERT failed: %s\n", buf); kdb_backtrace(); } } #ifdef KDB if (kassert_do_kdb) { kdb_enter(KDB_WHY_KASSERT, buf); } #endif atomic_add_int(&kassert_warnings, 1); } #endif /* KASSERT_PANIC_OPTIONAL */ #endif /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vpanic(fmt, ap); } void vpanic(const char *fmt, va_list ap) { #ifdef SMP cpuset_t other_cpus; #endif struct thread *td = curthread; int bootopt, newpanic; static char buf[256]; spinlock_enter(); #ifdef SMP /* * stop_cpus_hard(other_cpus) should prevent multiple CPUs from * concurrently entering panic. Only the winner will proceed * further. */ if (panicstr == NULL && !kdb_active) { other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); stop_cpus_hard(other_cpus); } #endif /* * Ensure that the scheduler is stopped while panicking, even if panic * has been entered from kdb. */ td->td_stopsched = 1; bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { bootopt |= RB_DUMP; panicstr = fmt; panicked = true; newpanic = 1; } if (newpanic) { (void)vsnprintf(buf, sizeof(buf), fmt, ap); panicstr = buf; cngrab(); printf("panic: %s\n", buf); } else { printf("panic: "); vprintf(fmt, ap); printf("\n"); } #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif printf("time = %jd\n", (intmax_t )time_second); #ifdef KDB if ((newpanic || trace_all_panics) && trace_on_panic) kdb_backtrace(); if (debugger_on_panic) kdb_enter(KDB_WHY_PANIC, "panic"); #endif /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; if (poweroff_on_panic) bootopt |= RB_POWEROFF; if (powercycle_on_panic) bootopt |= RB_POWERCYCLE; kern_reboot(bootopt); } /* * Support for poweroff delay. * * Please note that setting this delay too short might power off your machine * before the write cache on your hard disk has been flushed, leading to * soft-updates inconsistencies. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); static void poweroff_wait(void *junk, int howto) { if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); void kproc_shutdown(void *arg, int howto) { struct proc *p; int error; if (panicstr) return; p = (struct proc *)arg; printf("Waiting (max %d seconds) for system process `%s' to stop... ", kproc_shutdown_wait, p->p_comm); error = kproc_suspend(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } void kthread_shutdown(void *arg, int howto) { struct thread *td; int error; if (panicstr) return; td = (struct thread *)arg; printf("Waiting (max %d seconds) for system thread `%s' to stop... ", kproc_shutdown_wait, td->td_name); error = kthread_suspend(td, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } static int dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) { char buf[256]; struct dumperinfo *di; struct sbuf sb; int error; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); mtx_lock(&dumpconf_list_lk); TAILQ_FOREACH(di, &dumper_configs, di_next) { if (di != TAILQ_FIRST(&dumper_configs)) sbuf_putc(&sb, ','); sbuf_cat(&sb, di->di_devname); } mtx_unlock(&dumpconf_list_lk); error = sbuf_finish(&sb); sbuf_delete(&sb); return (error); } SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &dumper_configs, 0, dumpdevname_sysctl_handler, "A", "Device(s) for kernel dumps"); static int _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length); #ifdef EKCD static struct kerneldumpcrypto * kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) { struct kerneldumpcrypto *kdc; struct kerneldumpkey *kdk; uint32_t dumpkeysize; dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); kdc->kdc_encryption = encryption; switch (kdc->kdc_encryption) { case KERNELDUMP_ENC_AES_256_CBC: if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) goto failed; break; case KERNELDUMP_ENC_CHACHA20: chacha_keysetup(&kdc->kdc_chacha, key, 256); break; default: goto failed; } kdc->kdc_dumpkeysize = dumpkeysize; kdk = kdc->kdc_dumpkey; kdk->kdk_encryption = kdc->kdc_encryption; memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); return (kdc); failed: - explicit_bzero(kdc, sizeof(*kdc) + dumpkeysize); - free(kdc, M_EKCD); + zfree(kdc, M_EKCD); return (NULL); } static int kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) { uint8_t hash[SHA256_DIGEST_LENGTH]; SHA256_CTX ctx; struct kerneldumpkey *kdk; int error; error = 0; if (kdc == NULL) return (0); /* * When a user enters ddb it can write a crash dump multiple times. * Each time it should be encrypted using a different IV. */ SHA256_Init(&ctx); SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); SHA256_Final(hash, &ctx); bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); switch (kdc->kdc_encryption) { case KERNELDUMP_ENC_AES_256_CBC: if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, kdc->kdc_iv) <= 0) { error = EINVAL; goto out; } break; case KERNELDUMP_ENC_CHACHA20: chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); break; default: error = EINVAL; goto out; } kdk = kdc->kdc_dumpkey; memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); out: explicit_bzero(hash, sizeof(hash)); return (error); } static uint32_t kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) { if (kdc == NULL) return (0); return (kdc->kdc_dumpkeysize); } #endif /* EKCD */ static struct kerneldumpcomp * kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) { struct kerneldumpcomp *kdcomp; int format; switch (compression) { case KERNELDUMP_COMP_GZIP: format = COMPRESS_GZIP; break; case KERNELDUMP_COMP_ZSTD: format = COMPRESS_ZSTD; break; default: return (NULL); } kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); kdcomp->kdc_format = compression; kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, format, di->maxiosize, kerneldump_gzlevel, di); if (kdcomp->kdc_stream == NULL) { free(kdcomp, M_DUMPER); return (NULL); } kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); return (kdcomp); } static void kerneldumpcomp_destroy(struct dumperinfo *di) { struct kerneldumpcomp *kdcomp; kdcomp = di->kdcomp; if (kdcomp == NULL) return; compressor_fini(kdcomp->kdc_stream); - explicit_bzero(kdcomp->kdc_buf, di->maxiosize); - free(kdcomp->kdc_buf, M_DUMPER); + zfree(kdcomp->kdc_buf, M_DUMPER); free(kdcomp, M_DUMPER); } /* * Must not be present on global list. */ static void free_single_dumper(struct dumperinfo *di) { if (di == NULL) return; - if (di->blockbuf != NULL) { - explicit_bzero(di->blockbuf, di->blocksize); - free(di->blockbuf, M_DUMPER); - } + zfree(di->blockbuf, M_DUMPER); kerneldumpcomp_destroy(di); #ifdef EKCD - if (di->kdcrypto != NULL) { - explicit_bzero(di->kdcrypto, sizeof(*di->kdcrypto) + - di->kdcrypto->kdc_dumpkeysize); - free(di->kdcrypto, M_EKCD); - } + zfree(di->kdcrypto, M_EKCD); #endif - - explicit_bzero(di, sizeof(*di)); - free(di, M_DUMPER); + zfree(di, M_DUMPER); } /* Registration of dumpers */ int dumper_insert(const struct dumperinfo *di_template, const char *devname, const struct diocskerneldump_arg *kda) { struct dumperinfo *newdi, *listdi; bool inserted; uint8_t index; int error; index = kda->kda_index; MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && index != KDA_REMOVE_ALL); error = priv_check(curthread, PRIV_SETDUMPER); if (error != 0) return (error); newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK | M_ZERO); memcpy(newdi, di_template, sizeof(*newdi)); newdi->blockbuf = NULL; newdi->kdcrypto = NULL; newdi->kdcomp = NULL; strcpy(newdi->di_devname, devname); if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { #ifdef EKCD newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize, kda->kda_encryption, kda->kda_key, kda->kda_encryptedkeysize, kda->kda_encryptedkey); if (newdi->kdcrypto == NULL) { error = EINVAL; goto cleanup; } #else error = EOPNOTSUPP; goto cleanup; #endif } if (kda->kda_compression != KERNELDUMP_COMP_NONE) { #ifdef EKCD /* * We can't support simultaneous unpadded block cipher * encryption and compression because there is no guarantee the * length of the compressed result is exactly a multiple of the * cipher block size. */ if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { error = EOPNOTSUPP; goto cleanup; } #endif newdi->kdcomp = kerneldumpcomp_create(newdi, kda->kda_compression); if (newdi->kdcomp == NULL) { error = EINVAL; goto cleanup; } } newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); /* Add the new configuration to the queue */ mtx_lock(&dumpconf_list_lk); inserted = false; TAILQ_FOREACH(listdi, &dumper_configs, di_next) { if (index == 0) { TAILQ_INSERT_BEFORE(listdi, newdi, di_next); inserted = true; break; } index--; } if (!inserted) TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); mtx_unlock(&dumpconf_list_lk); return (0); cleanup: free_single_dumper(newdi); return (error); } #ifdef DDB void dumper_ddb_insert(struct dumperinfo *newdi) { TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); } void dumper_ddb_remove(struct dumperinfo *di) { TAILQ_REMOVE(&dumper_configs, di, di_next); } #endif static bool dumper_config_match(const struct dumperinfo *di, const char *devname, const struct diocskerneldump_arg *kda) { if (kda->kda_index == KDA_REMOVE_ALL) return (true); if (strcmp(di->di_devname, devname) != 0) return (false); /* * Allow wildcard removal of configs matching a device on g_dev_orphan. */ if (kda->kda_index == KDA_REMOVE_DEV) return (true); if (di->kdcomp != NULL) { if (di->kdcomp->kdc_format != kda->kda_compression) return (false); } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) return (false); #ifdef EKCD if (di->kdcrypto != NULL) { if (di->kdcrypto->kdc_encryption != kda->kda_encryption) return (false); /* * Do we care to verify keys match to delete? It seems weird * to expect multiple fallback dump configurations on the same * device that only differ in crypto key. */ } else #endif if (kda->kda_encryption != KERNELDUMP_ENC_NONE) return (false); return (true); } int dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) { struct dumperinfo *di, *sdi; bool found; int error; error = priv_check(curthread, PRIV_SETDUMPER); if (error != 0) return (error); /* * Try to find a matching configuration, and kill it. * * NULL 'kda' indicates remove any configuration matching 'devname', * which may remove multiple configurations in atypical configurations. */ found = false; mtx_lock(&dumpconf_list_lk); TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { if (dumper_config_match(di, devname, kda)) { found = true; TAILQ_REMOVE(&dumper_configs, di, di_next); free_single_dumper(di); } } mtx_unlock(&dumpconf_list_lk); /* Only produce ENOENT if a more targeted match didn't match. */ if (!found && kda->kda_index == KDA_REMOVE) return (ENOENT); return (0); } static int dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) { if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { if (di->kdcomp != NULL && offset >= di->mediaoffset) { printf( "Compressed dump failed to fit in device boundaries.\n"); return (E2BIG); } printf("Attempt to write outside dump device boundaries.\n" "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", (intmax_t)offset, (intmax_t)di->mediaoffset, (uintmax_t)length, (intmax_t)di->mediasize); return (ENOSPC); } if (length % di->blocksize != 0) { printf("Attempt to write partial block of length %ju.\n", (uintmax_t)length); return (EINVAL); } if (offset % di->blocksize != 0) { printf("Attempt to write at unaligned offset %jd.\n", (intmax_t)offset); return (EINVAL); } return (0); } #ifdef EKCD static int dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) { switch (kdc->kdc_encryption) { case KERNELDUMP_ENC_AES_256_CBC: if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 8 * size, buf) <= 0) { return (EIO); } if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { return (EIO); } break; case KERNELDUMP_ENC_CHACHA20: chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); break; default: return (EINVAL); } return (0); } /* Encrypt data and call dumper. */ static int dump_encrypted_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; struct kerneldumpcrypto *kdc; int error; size_t nbytes; kdc = di->kdcrypto; while (length > 0) { nbytes = MIN(length, sizeof(buf)); bcopy(virtual, buf, nbytes); if (dump_encrypt(kdc, buf, nbytes) != 0) return (EIO); error = dump_write(di, buf, physical, offset, nbytes); if (error != 0) return (error); offset += nbytes; virtual = (void *)((uint8_t *)virtual + nbytes); length -= nbytes; } return (0); } #endif /* EKCD */ static int kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) { struct dumperinfo *di; size_t resid, rlength; int error; di = arg; if (length % di->blocksize != 0) { /* * This must be the final write after flushing the compression * stream. Write as many full blocks as possible and stash the * residual data in the dumper's block buffer. It will be * padded and written in dump_finish(). */ rlength = rounddown(length, di->blocksize); if (rlength != 0) { error = _dump_append(di, base, 0, rlength); if (error != 0) return (error); } resid = length - rlength; memmove(di->blockbuf, (uint8_t *)base + rlength, resid); di->kdcomp->kdc_resid = resid; return (EAGAIN); } return (_dump_append(di, base, 0, length)); } /* * Write kernel dump headers at the beginning and end of the dump extent. * Write the kernel dump encryption key after the leading header if we were * configured to do so. */ static int dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) { #ifdef EKCD struct kerneldumpcrypto *kdc; #endif void *buf, *key; size_t hdrsz; uint64_t extent; uint32_t keysize; int error; hdrsz = sizeof(*kdh); if (hdrsz > di->blocksize) return (ENOMEM); #ifdef EKCD kdc = di->kdcrypto; key = kdc->kdc_dumpkey; keysize = kerneldumpcrypto_dumpkeysize(kdc); #else key = NULL; keysize = 0; #endif /* * If the dump device has special handling for headers, let it take care * of writing them out. */ if (di->dumper_hdr != NULL) return (di->dumper_hdr(di, kdh, key, keysize)); if (hdrsz == di->blocksize) buf = kdh; else { buf = di->blockbuf; memset(buf, 0, di->blocksize); memcpy(buf, kdh, hdrsz); } extent = dtoh64(kdh->dumpextent); #ifdef EKCD if (kdc != NULL) { error = dump_write(di, kdc->kdc_dumpkey, 0, di->mediaoffset + di->mediasize - di->blocksize - extent - keysize, keysize); if (error != 0) return (error); } #endif error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - keysize, di->blocksize); if (error == 0) error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - di->blocksize, di->blocksize); return (error); } /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to * protect us from metadata and metadata from us. */ #define SIZEOF_METADATA (64 * 1024) /* * Do some preliminary setup for a kernel dump: initialize state for encryption, * if requested, and make sure that we have enough space on the dump device. * * We set things up so that the dump ends before the last sector of the dump * device, at which the trailing header is written. * * +-----------+------+-----+----------------------------+------+ * | | lhdr | key | ... kernel dump ... | thdr | * +-----------+------+-----+----------------------------+------+ * 1 blk opt <------- dump extent --------> 1 blk * * Dumps written using dump_append() start at the beginning of the extent. * Uncompressed dumps will use the entire extent, but compressed dumps typically * will not. The true length of the dump is recorded in the leading and trailing * headers once the dump has been completed. * * The dump device may provide a callback, in which case it will initialize * dumpoff and take care of laying out the headers. */ int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) { uint64_t dumpextent, span; uint32_t keysize; int error; #ifdef EKCD error = kerneldumpcrypto_init(di->kdcrypto); if (error != 0) return (error); keysize = kerneldumpcrypto_dumpkeysize(di->kdcrypto); #else error = 0; keysize = 0; #endif if (di->dumper_start != NULL) { error = di->dumper_start(di); } else { dumpextent = dtoh64(kdh->dumpextent); span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + keysize; if (di->mediasize < span) { if (di->kdcomp == NULL) return (E2BIG); /* * We don't yet know how much space the compressed dump * will occupy, so try to use the whole swap partition * (minus the first 64KB) in the hope that the * compressed dump will fit. If that doesn't turn out to * be enough, the bounds checking in dump_write() * will catch us and cause the dump to fail. */ dumpextent = di->mediasize - span + dumpextent; kdh->dumpextent = htod64(dumpextent); } /* * The offset at which to begin writing the dump. */ di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - dumpextent; } di->origdumpoff = di->dumpoff; return (error); } static int _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length) { int error; #ifdef EKCD if (di->kdcrypto != NULL) error = dump_encrypted_write(di, virtual, physical, di->dumpoff, length); else #endif error = dump_write(di, virtual, physical, di->dumpoff, length); if (error == 0) di->dumpoff += length; return (error); } /* * Write to the dump device starting at dumpoff. When compression is enabled, * writes to the device will be performed using a callback that gets invoked * when the compression stream's output buffer is full. */ int dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length) { void *buf; if (di->kdcomp != NULL) { /* Bounce through a buffer to avoid CRC errors. */ if (length > di->maxiosize) return (EINVAL); buf = di->kdcomp->kdc_buf; memmove(buf, virtual, length); return (compressor_write(di->kdcomp->kdc_stream, buf, length)); } return (_dump_append(di, virtual, physical, length)); } /* * Write to the dump device at the specified offset. */ int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { int error; error = dump_check_bounds(di, offset, length); if (error != 0) return (error); return (di->dumper(di->priv, virtual, physical, offset, length)); } /* * Perform kernel dump finalization: flush the compression stream, if necessary, * write the leading and trailing kernel dump headers now that we know the true * length of the dump, and optionally write the encryption key following the * leading header. */ int dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) { int error; if (di->kdcomp != NULL) { error = compressor_flush(di->kdcomp->kdc_stream); if (error == EAGAIN) { /* We have residual data in di->blockbuf. */ error = dump_write(di, di->blockbuf, 0, di->dumpoff, di->blocksize); di->dumpoff += di->kdcomp->kdc_resid; di->kdcomp->kdc_resid = 0; } if (error != 0) return (error); /* * We now know the size of the compressed dump, so update the * header accordingly and recompute parity. */ kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); kdh->parity = 0; kdh->parity = kerneldump_parity(kdh); compressor_reset(di->kdcomp->kdc_stream); } error = dump_write_headers(di, kdh); if (error != 0) return (error); (void)dump_write(di, NULL, 0, 0, 0); return (0); } void dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, const char *magic, uint32_t archver, uint64_t dumplen) { size_t dstsize; bzero(kdh, sizeof(*kdh)); strlcpy(kdh->magic, magic, sizeof(kdh->magic)); strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); kdh->version = htod32(KERNELDUMPVERSION); kdh->architectureversion = htod32(archver); kdh->dumplength = htod64(dumplen); kdh->dumpextent = kdh->dumplength; kdh->dumptime = htod64(time_second); #ifdef EKCD kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); #else kdh->dumpkeysize = 0; #endif kdh->blocksize = htod32(di->blocksize); strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); dstsize = sizeof(kdh->versionstring); if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) kdh->versionstring[dstsize - 2] = '\n'; if (panicstr != NULL) strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); if (di->kdcomp != NULL) kdh->compression = di->kdcomp->kdc_format; kdh->parity = kerneldump_parity(kdh); } #ifdef DDB DB_SHOW_COMMAND(panic, db_show_panic) { if (panicstr == NULL) db_printf("panicstr not set\n"); else db_printf("panic: %s\n", panicstr); } #endif Index: head/sys/kern/uipc_ktls.c =================================================================== --- head/sys/kern/uipc_ktls.c (revision 362623) +++ head/sys/kern/uipc_ktls.c (revision 362624) @@ -1,1651 +1,1648 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2014-2019 Netflix Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) #include #endif #include #include #include #ifdef RSS #include #include #endif #include #include #if defined(INET) || defined(INET6) #include #include #endif #include #ifdef TCP_OFFLOAD #include #endif #include #include #include #include #include struct ktls_wq { struct mtx mtx; STAILQ_HEAD(, mbuf) head; bool running; } __aligned(CACHE_LINE_SIZE); static struct ktls_wq *ktls_wq; static struct proc *ktls_proc; LIST_HEAD(, ktls_crypto_backend) ktls_backends; static struct rmlock ktls_backends_lock; static uma_zone_t ktls_session_zone; static uint16_t ktls_cpuid_lookup[MAXCPU]; SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Kernel TLS offload"); SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Kernel TLS offload stats"); static int ktls_allow_unload; SYSCTL_INT(_kern_ipc_tls, OID_AUTO, allow_unload, CTLFLAG_RDTUN, &ktls_allow_unload, 0, "Allow software crypto modules to unload"); #ifdef RSS static int ktls_bind_threads = 1; #else static int ktls_bind_threads; #endif SYSCTL_INT(_kern_ipc_tls, OID_AUTO, bind_threads, CTLFLAG_RDTUN, &ktls_bind_threads, 0, "Bind crypto threads to cores or domains at boot"); static u_int ktls_maxlen = 16384; SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, maxlen, CTLFLAG_RWTUN, &ktls_maxlen, 0, "Maximum TLS record size"); static int ktls_number_threads; SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads, CTLFLAG_RD, &ktls_number_threads, 0, "Number of TLS threads in thread-pool"); static bool ktls_offload_enable; SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RW, &ktls_offload_enable, 0, "Enable support for kernel TLS offload"); static bool ktls_cbc_enable = true; SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, cbc_enable, CTLFLAG_RW, &ktls_cbc_enable, 1, "Enable Support of AES-CBC crypto for kernel TLS"); static counter_u64_t ktls_tasks_active; SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD, &ktls_tasks_active, "Number of active tasks"); static counter_u64_t ktls_cnt_on; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, so_inqueue, CTLFLAG_RD, &ktls_cnt_on, "Number of TLS records in queue to tasks for SW crypto"); static counter_u64_t ktls_offload_total; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total, CTLFLAG_RD, &ktls_offload_total, "Total successful TLS setups (parameters set)"); static counter_u64_t ktls_offload_enable_calls; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, enable_calls, CTLFLAG_RD, &ktls_offload_enable_calls, "Total number of TLS enable calls made"); static counter_u64_t ktls_offload_active; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD, &ktls_offload_active, "Total Active TLS sessions"); static counter_u64_t ktls_offload_failed_crypto; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD, &ktls_offload_failed_crypto, "Total TLS crypto failures"); static counter_u64_t ktls_switch_to_ifnet; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_ifnet, CTLFLAG_RD, &ktls_switch_to_ifnet, "TLS sessions switched from SW to ifnet"); static counter_u64_t ktls_switch_to_sw; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_sw, CTLFLAG_RD, &ktls_switch_to_sw, "TLS sessions switched from ifnet to SW"); static counter_u64_t ktls_switch_failed; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed, CTLFLAG_RD, &ktls_switch_failed, "TLS sessions unable to switch between SW and ifnet"); SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Software TLS session stats"); SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Hardware (ifnet) TLS session stats"); #ifdef TCP_OFFLOAD SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, toe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "TOE TLS session stats"); #endif static counter_u64_t ktls_sw_cbc; SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, cbc, CTLFLAG_RD, &ktls_sw_cbc, "Active number of software TLS sessions using AES-CBC"); static counter_u64_t ktls_sw_gcm; SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, gcm, CTLFLAG_RD, &ktls_sw_gcm, "Active number of software TLS sessions using AES-GCM"); static counter_u64_t ktls_ifnet_cbc; SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, cbc, CTLFLAG_RD, &ktls_ifnet_cbc, "Active number of ifnet TLS sessions using AES-CBC"); static counter_u64_t ktls_ifnet_gcm; SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, gcm, CTLFLAG_RD, &ktls_ifnet_gcm, "Active number of ifnet TLS sessions using AES-GCM"); static counter_u64_t ktls_ifnet_reset; SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset, CTLFLAG_RD, &ktls_ifnet_reset, "TLS sessions updated to a new ifnet send tag"); static counter_u64_t ktls_ifnet_reset_dropped; SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_dropped, CTLFLAG_RD, &ktls_ifnet_reset_dropped, "TLS sessions dropped after failing to update ifnet send tag"); static counter_u64_t ktls_ifnet_reset_failed; SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_failed, CTLFLAG_RD, &ktls_ifnet_reset_failed, "TLS sessions that failed to allocate a new ifnet send tag"); static int ktls_ifnet_permitted; SYSCTL_UINT(_kern_ipc_tls_ifnet, OID_AUTO, permitted, CTLFLAG_RWTUN, &ktls_ifnet_permitted, 1, "Whether to permit hardware (ifnet) TLS sessions"); #ifdef TCP_OFFLOAD static counter_u64_t ktls_toe_cbc; SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, cbc, CTLFLAG_RD, &ktls_toe_cbc, "Active number of TOE TLS sessions using AES-CBC"); static counter_u64_t ktls_toe_gcm; SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, gcm, CTLFLAG_RD, &ktls_toe_gcm, "Active number of TOE TLS sessions using AES-GCM"); #endif static MALLOC_DEFINE(M_KTLS, "ktls", "Kernel TLS"); static void ktls_cleanup(struct ktls_session *tls); #if defined(INET) || defined(INET6) static void ktls_reset_send_tag(void *context, int pending); #endif static void ktls_work_thread(void *ctx); int ktls_crypto_backend_register(struct ktls_crypto_backend *be) { struct ktls_crypto_backend *curr_be, *tmp; if (be->api_version != KTLS_API_VERSION) { printf("KTLS: API version mismatch (%d vs %d) for %s\n", be->api_version, KTLS_API_VERSION, be->name); return (EINVAL); } rm_wlock(&ktls_backends_lock); printf("KTLS: Registering crypto method %s with prio %d\n", be->name, be->prio); if (LIST_EMPTY(&ktls_backends)) { LIST_INSERT_HEAD(&ktls_backends, be, next); } else { LIST_FOREACH_SAFE(curr_be, &ktls_backends, next, tmp) { if (curr_be->prio < be->prio) { LIST_INSERT_BEFORE(curr_be, be, next); break; } if (LIST_NEXT(curr_be, next) == NULL) { LIST_INSERT_AFTER(curr_be, be, next); break; } } } rm_wunlock(&ktls_backends_lock); return (0); } int ktls_crypto_backend_deregister(struct ktls_crypto_backend *be) { struct ktls_crypto_backend *tmp; /* * Don't error if the backend isn't registered. This permits * MOD_UNLOAD handlers to use this function unconditionally. */ rm_wlock(&ktls_backends_lock); LIST_FOREACH(tmp, &ktls_backends, next) { if (tmp == be) break; } if (tmp == NULL) { rm_wunlock(&ktls_backends_lock); return (0); } if (!ktls_allow_unload) { rm_wunlock(&ktls_backends_lock); printf( "KTLS: Deregistering crypto method %s is not supported\n", be->name); return (EBUSY); } if (be->use_count) { rm_wunlock(&ktls_backends_lock); return (EBUSY); } LIST_REMOVE(be, next); rm_wunlock(&ktls_backends_lock); return (0); } #if defined(INET) || defined(INET6) static u_int ktls_get_cpu(struct socket *so) { struct inpcb *inp; u_int cpuid; inp = sotoinpcb(so); #ifdef RSS cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); if (cpuid != NETISR_CPUID_NONE) return (cpuid); #endif /* * Just use the flowid to shard connections in a repeatable * fashion. Note that some crypto backends rely on the * serialization provided by having the same connection use * the same queue. */ cpuid = ktls_cpuid_lookup[inp->inp_flowid % ktls_number_threads]; return (cpuid); } #endif static void ktls_init(void *dummy __unused) { struct thread *td; struct pcpu *pc; cpuset_t mask; int error, i; ktls_tasks_active = counter_u64_alloc(M_WAITOK); ktls_cnt_on = counter_u64_alloc(M_WAITOK); ktls_offload_total = counter_u64_alloc(M_WAITOK); ktls_offload_enable_calls = counter_u64_alloc(M_WAITOK); ktls_offload_active = counter_u64_alloc(M_WAITOK); ktls_offload_failed_crypto = counter_u64_alloc(M_WAITOK); ktls_switch_to_ifnet = counter_u64_alloc(M_WAITOK); ktls_switch_to_sw = counter_u64_alloc(M_WAITOK); ktls_switch_failed = counter_u64_alloc(M_WAITOK); ktls_sw_cbc = counter_u64_alloc(M_WAITOK); ktls_sw_gcm = counter_u64_alloc(M_WAITOK); ktls_ifnet_cbc = counter_u64_alloc(M_WAITOK); ktls_ifnet_gcm = counter_u64_alloc(M_WAITOK); ktls_ifnet_reset = counter_u64_alloc(M_WAITOK); ktls_ifnet_reset_dropped = counter_u64_alloc(M_WAITOK); ktls_ifnet_reset_failed = counter_u64_alloc(M_WAITOK); #ifdef TCP_OFFLOAD ktls_toe_cbc = counter_u64_alloc(M_WAITOK); ktls_toe_gcm = counter_u64_alloc(M_WAITOK); #endif rm_init(&ktls_backends_lock, "ktls backends"); LIST_INIT(&ktls_backends); ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS, M_WAITOK | M_ZERO); ktls_session_zone = uma_zcreate("ktls_session", sizeof(struct ktls_session), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); /* * Initialize the workqueues to run the TLS work. We create a * work queue for each CPU. */ CPU_FOREACH(i) { STAILQ_INIT(&ktls_wq[i].head); mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF); error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i], &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i); if (error) panic("Can't add KTLS thread %d error %d", i, error); /* * Bind threads to cores. If ktls_bind_threads is > * 1, then we bind to the NUMA domain. */ if (ktls_bind_threads) { if (ktls_bind_threads > 1) { pc = pcpu_find(i); CPU_COPY(&cpuset_domain[pc->pc_domain], &mask); } else { CPU_SETOF(i, &mask); } error = cpuset_setthread(td->td_tid, &mask); if (error) panic( "Unable to bind KTLS thread for CPU %d error %d", i, error); } ktls_cpuid_lookup[ktls_number_threads] = i; ktls_number_threads++; } printf("KTLS: Initialized %d threads\n", ktls_number_threads); } SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL); #if defined(INET) || defined(INET6) static int ktls_create_session(struct socket *so, struct tls_enable *en, struct ktls_session **tlsp) { struct ktls_session *tls; int error; /* Only TLS 1.0 - 1.3 are supported. */ if (en->tls_vmajor != TLS_MAJOR_VER_ONE) return (EINVAL); if (en->tls_vminor < TLS_MINOR_VER_ZERO || en->tls_vminor > TLS_MINOR_VER_THREE) return (EINVAL); if (en->auth_key_len < 0 || en->auth_key_len > TLS_MAX_PARAM_SIZE) return (EINVAL); if (en->cipher_key_len < 0 || en->cipher_key_len > TLS_MAX_PARAM_SIZE) return (EINVAL); if (en->iv_len < 0 || en->iv_len > sizeof(tls->params.iv)) return (EINVAL); /* All supported algorithms require a cipher key. */ if (en->cipher_key_len == 0) return (EINVAL); /* No flags are currently supported. */ if (en->flags != 0) return (EINVAL); /* Common checks for supported algorithms. */ switch (en->cipher_algorithm) { case CRYPTO_AES_NIST_GCM_16: /* * auth_algorithm isn't used, but permit GMAC values * for compatibility. */ switch (en->auth_algorithm) { case 0: #ifdef COMPAT_FREEBSD12 /* XXX: Really 13.0-current COMPAT. */ case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: #endif break; default: return (EINVAL); } if (en->auth_key_len != 0) return (EINVAL); if ((en->tls_vminor == TLS_MINOR_VER_TWO && en->iv_len != TLS_AEAD_GCM_LEN) || (en->tls_vminor == TLS_MINOR_VER_THREE && en->iv_len != TLS_1_3_GCM_IV_LEN)) return (EINVAL); break; case CRYPTO_AES_CBC: switch (en->auth_algorithm) { case CRYPTO_SHA1_HMAC: /* * TLS 1.0 requires an implicit IV. TLS 1.1+ * all use explicit IVs. */ if (en->tls_vminor == TLS_MINOR_VER_ZERO) { if (en->iv_len != TLS_CBC_IMPLICIT_IV_LEN) return (EINVAL); break; } /* FALLTHROUGH */ case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: /* Ignore any supplied IV. */ en->iv_len = 0; break; default: return (EINVAL); } if (en->auth_key_len == 0) return (EINVAL); break; default: return (EINVAL); } tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO); counter_u64_add(ktls_offload_active, 1); refcount_init(&tls->refcount, 1); TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls); tls->wq_index = ktls_get_cpu(so); tls->params.cipher_algorithm = en->cipher_algorithm; tls->params.auth_algorithm = en->auth_algorithm; tls->params.tls_vmajor = en->tls_vmajor; tls->params.tls_vminor = en->tls_vminor; tls->params.flags = en->flags; tls->params.max_frame_len = min(TLS_MAX_MSG_SIZE_V10_2, ktls_maxlen); /* Set the header and trailer lengths. */ tls->params.tls_hlen = sizeof(struct tls_record_layer); switch (en->cipher_algorithm) { case CRYPTO_AES_NIST_GCM_16: /* * TLS 1.2 uses a 4 byte implicit IV with an explicit 8 byte * nonce. TLS 1.3 uses a 12 byte implicit IV. */ if (en->tls_vminor < TLS_MINOR_VER_THREE) tls->params.tls_hlen += sizeof(uint64_t); tls->params.tls_tlen = AES_GMAC_HASH_LEN; /* * TLS 1.3 includes optional padding which we * do not support, and also puts the "real" record * type at the end of the encrypted data. */ if (en->tls_vminor == TLS_MINOR_VER_THREE) tls->params.tls_tlen += sizeof(uint8_t); tls->params.tls_bs = 1; break; case CRYPTO_AES_CBC: switch (en->auth_algorithm) { case CRYPTO_SHA1_HMAC: if (en->tls_vminor == TLS_MINOR_VER_ZERO) { /* Implicit IV, no nonce. */ } else { tls->params.tls_hlen += AES_BLOCK_LEN; } tls->params.tls_tlen = AES_BLOCK_LEN + SHA1_HASH_LEN; break; case CRYPTO_SHA2_256_HMAC: tls->params.tls_hlen += AES_BLOCK_LEN; tls->params.tls_tlen = AES_BLOCK_LEN + SHA2_256_HASH_LEN; break; case CRYPTO_SHA2_384_HMAC: tls->params.tls_hlen += AES_BLOCK_LEN; tls->params.tls_tlen = AES_BLOCK_LEN + SHA2_384_HASH_LEN; break; default: panic("invalid hmac"); } tls->params.tls_bs = AES_BLOCK_LEN; break; default: panic("invalid cipher"); } KASSERT(tls->params.tls_hlen <= MBUF_PEXT_HDR_LEN, ("TLS header length too long: %d", tls->params.tls_hlen)); KASSERT(tls->params.tls_tlen <= MBUF_PEXT_TRAIL_LEN, ("TLS trailer length too long: %d", tls->params.tls_tlen)); if (en->auth_key_len != 0) { tls->params.auth_key_len = en->auth_key_len; tls->params.auth_key = malloc(en->auth_key_len, M_KTLS, M_WAITOK); error = copyin(en->auth_key, tls->params.auth_key, en->auth_key_len); if (error) goto out; } tls->params.cipher_key_len = en->cipher_key_len; tls->params.cipher_key = malloc(en->cipher_key_len, M_KTLS, M_WAITOK); error = copyin(en->cipher_key, tls->params.cipher_key, en->cipher_key_len); if (error) goto out; /* * This holds the implicit portion of the nonce for GCM and * the initial implicit IV for TLS 1.0. The explicit portions * of the IV are generated in ktls_frame(). */ if (en->iv_len != 0) { tls->params.iv_len = en->iv_len; error = copyin(en->iv, tls->params.iv, en->iv_len); if (error) goto out; /* * For TLS 1.2, generate an 8-byte nonce as a counter * to generate unique explicit IVs. * * Store this counter in the last 8 bytes of the IV * array so that it is 8-byte aligned. */ if (en->cipher_algorithm == CRYPTO_AES_NIST_GCM_16 && en->tls_vminor == TLS_MINOR_VER_TWO) arc4rand(tls->params.iv + 8, sizeof(uint64_t), 0); } *tlsp = tls; return (0); out: ktls_cleanup(tls); return (error); } static struct ktls_session * ktls_clone_session(struct ktls_session *tls) { struct ktls_session *tls_new; tls_new = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO); counter_u64_add(ktls_offload_active, 1); refcount_init(&tls_new->refcount, 1); /* Copy fields from existing session. */ tls_new->params = tls->params; tls_new->wq_index = tls->wq_index; /* Deep copy keys. */ if (tls_new->params.auth_key != NULL) { tls_new->params.auth_key = malloc(tls->params.auth_key_len, M_KTLS, M_WAITOK); memcpy(tls_new->params.auth_key, tls->params.auth_key, tls->params.auth_key_len); } tls_new->params.cipher_key = malloc(tls->params.cipher_key_len, M_KTLS, M_WAITOK); memcpy(tls_new->params.cipher_key, tls->params.cipher_key, tls->params.cipher_key_len); return (tls_new); } #endif static void ktls_cleanup(struct ktls_session *tls) { counter_u64_add(ktls_offload_active, -1); switch (tls->mode) { case TCP_TLS_MODE_SW: MPASS(tls->be != NULL); switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: counter_u64_add(ktls_sw_cbc, -1); break; case CRYPTO_AES_NIST_GCM_16: counter_u64_add(ktls_sw_gcm, -1); break; } tls->free(tls); break; case TCP_TLS_MODE_IFNET: switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: counter_u64_add(ktls_ifnet_cbc, -1); break; case CRYPTO_AES_NIST_GCM_16: counter_u64_add(ktls_ifnet_gcm, -1); break; } m_snd_tag_rele(tls->snd_tag); break; #ifdef TCP_OFFLOAD case TCP_TLS_MODE_TOE: switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: counter_u64_add(ktls_toe_cbc, -1); break; case CRYPTO_AES_NIST_GCM_16: counter_u64_add(ktls_toe_gcm, -1); break; } break; #endif } if (tls->params.auth_key != NULL) { - explicit_bzero(tls->params.auth_key, tls->params.auth_key_len); - free(tls->params.auth_key, M_KTLS); + zfree(tls->params.auth_key, M_KTLS); tls->params.auth_key = NULL; tls->params.auth_key_len = 0; } if (tls->params.cipher_key != NULL) { - explicit_bzero(tls->params.cipher_key, - tls->params.cipher_key_len); - free(tls->params.cipher_key, M_KTLS); + zfree(tls->params.cipher_key, M_KTLS); tls->params.cipher_key = NULL; tls->params.cipher_key_len = 0; } explicit_bzero(tls->params.iv, sizeof(tls->params.iv)); } #if defined(INET) || defined(INET6) #ifdef TCP_OFFLOAD static int ktls_try_toe(struct socket *so, struct ktls_session *tls, int direction) { struct inpcb *inp; struct tcpcb *tp; int error; inp = so->so_pcb; INP_WLOCK(inp); if (inp->inp_flags2 & INP_FREED) { INP_WUNLOCK(inp); return (ECONNRESET); } if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); return (ECONNRESET); } if (inp->inp_socket == NULL) { INP_WUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); if (tp->tod == NULL) { INP_WUNLOCK(inp); return (EOPNOTSUPP); } error = tcp_offload_alloc_tls_session(tp, tls, direction); INP_WUNLOCK(inp); if (error == 0) { tls->mode = TCP_TLS_MODE_TOE; switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: counter_u64_add(ktls_toe_cbc, 1); break; case CRYPTO_AES_NIST_GCM_16: counter_u64_add(ktls_toe_gcm, 1); break; } } return (error); } #endif /* * Common code used when first enabling ifnet TLS on a connection or * when allocating a new ifnet TLS session due to a routing change. * This function allocates a new TLS send tag on whatever interface * the connection is currently routed over. */ static int ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force, struct m_snd_tag **mstp) { union if_snd_tag_alloc_params params; struct ifnet *ifp; struct nhop_object *nh; struct tcpcb *tp; int error; INP_RLOCK(inp); if (inp->inp_flags2 & INP_FREED) { INP_RUNLOCK(inp); return (ECONNRESET); } if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_RUNLOCK(inp); return (ECONNRESET); } if (inp->inp_socket == NULL) { INP_RUNLOCK(inp); return (ECONNRESET); } tp = intotcpcb(inp); /* * Check administrative controls on ifnet TLS to determine if * ifnet TLS should be denied. * * - Always permit 'force' requests. * - ktls_ifnet_permitted == 0: always deny. */ if (!force && ktls_ifnet_permitted == 0) { INP_RUNLOCK(inp); return (ENXIO); } /* * XXX: Use the cached route in the inpcb to find the * interface. This should perhaps instead use * rtalloc1_fib(dst, 0, 0, fibnum). Since KTLS is only * enabled after a connection has completed key negotiation in * userland, the cached route will be present in practice. */ nh = inp->inp_route.ro_nh; if (nh == NULL) { INP_RUNLOCK(inp); return (ENXIO); } ifp = nh->nh_ifp; if_ref(ifp); params.hdr.type = IF_SND_TAG_TYPE_TLS; params.hdr.flowid = inp->inp_flowid; params.hdr.flowtype = inp->inp_flowtype; params.hdr.numa_domain = inp->inp_numa_domain; params.tls.inp = inp; params.tls.tls = tls; INP_RUNLOCK(inp); if (ifp->if_snd_tag_alloc == NULL) { error = EOPNOTSUPP; goto out; } if ((ifp->if_capenable & IFCAP_NOMAP) == 0) { error = EOPNOTSUPP; goto out; } if (inp->inp_vflag & INP_IPV6) { if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) { error = EOPNOTSUPP; goto out; } } else { if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) { error = EOPNOTSUPP; goto out; } } error = ifp->if_snd_tag_alloc(ifp, ¶ms, mstp); out: if_rele(ifp); return (error); } static int ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force) { struct m_snd_tag *mst; int error; error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); if (error == 0) { tls->mode = TCP_TLS_MODE_IFNET; tls->snd_tag = mst; switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: counter_u64_add(ktls_ifnet_cbc, 1); break; case CRYPTO_AES_NIST_GCM_16: counter_u64_add(ktls_ifnet_gcm, 1); break; } } return (error); } static int ktls_try_sw(struct socket *so, struct ktls_session *tls) { struct rm_priotracker prio; struct ktls_crypto_backend *be; /* * Choose the best software crypto backend. Backends are * stored in sorted priority order (larget value == most * important at the head of the list), so this just stops on * the first backend that claims the session by returning * success. */ if (ktls_allow_unload) rm_rlock(&ktls_backends_lock, &prio); LIST_FOREACH(be, &ktls_backends, next) { if (be->try(so, tls) == 0) break; KASSERT(tls->cipher == NULL, ("ktls backend leaked a cipher pointer")); } if (be != NULL) { if (ktls_allow_unload) be->use_count++; tls->be = be; } if (ktls_allow_unload) rm_runlock(&ktls_backends_lock, &prio); if (be == NULL) return (EOPNOTSUPP); tls->mode = TCP_TLS_MODE_SW; switch (tls->params.cipher_algorithm) { case CRYPTO_AES_CBC: counter_u64_add(ktls_sw_cbc, 1); break; case CRYPTO_AES_NIST_GCM_16: counter_u64_add(ktls_sw_gcm, 1); break; } return (0); } int ktls_enable_rx(struct socket *so, struct tls_enable *en) { struct ktls_session *tls; int error; if (!ktls_offload_enable) return (ENOTSUP); counter_u64_add(ktls_offload_enable_calls, 1); /* * This should always be true since only the TCP socket option * invokes this function. */ if (so->so_proto->pr_protocol != IPPROTO_TCP) return (EINVAL); /* * XXX: Don't overwrite existing sessions. We should permit * this to support rekeying in the future. */ if (so->so_rcv.sb_tls_info != NULL) return (EALREADY); if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable) return (ENOTSUP); error = ktls_create_session(so, en, &tls); if (error) return (error); /* TLS RX offload is only supported on TOE currently. */ #ifdef TCP_OFFLOAD error = ktls_try_toe(so, tls, KTLS_RX); #else error = EOPNOTSUPP; #endif if (error) { ktls_cleanup(tls); return (error); } /* Mark the socket as using TLS offload. */ SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_tls_info = tls; SOCKBUF_UNLOCK(&so->so_rcv); counter_u64_add(ktls_offload_total, 1); return (0); } int ktls_enable_tx(struct socket *so, struct tls_enable *en) { struct ktls_session *tls; int error; if (!ktls_offload_enable) return (ENOTSUP); counter_u64_add(ktls_offload_enable_calls, 1); /* * This should always be true since only the TCP socket option * invokes this function. */ if (so->so_proto->pr_protocol != IPPROTO_TCP) return (EINVAL); /* * XXX: Don't overwrite existing sessions. We should permit * this to support rekeying in the future. */ if (so->so_snd.sb_tls_info != NULL) return (EALREADY); if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable) return (ENOTSUP); /* TLS requires ext pgs */ if (mb_use_ext_pgs == 0) return (ENXIO); error = ktls_create_session(so, en, &tls); if (error) return (error); /* Prefer TOE -> ifnet TLS -> software TLS. */ #ifdef TCP_OFFLOAD error = ktls_try_toe(so, tls, KTLS_TX); if (error) #endif error = ktls_try_ifnet(so, tls, false); if (error) error = ktls_try_sw(so, tls); if (error) { ktls_cleanup(tls); return (error); } error = sblock(&so->so_snd, SBL_WAIT); if (error) { ktls_cleanup(tls); return (error); } SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_tls_seqno = be64dec(en->rec_seq); so->so_snd.sb_tls_info = tls; if (tls->mode != TCP_TLS_MODE_SW) so->so_snd.sb_flags |= SB_TLS_IFNET; SOCKBUF_UNLOCK(&so->so_snd); sbunlock(&so->so_snd); counter_u64_add(ktls_offload_total, 1); return (0); } int ktls_get_rx_mode(struct socket *so) { struct ktls_session *tls; struct inpcb *inp; int mode; inp = so->so_pcb; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK(&so->so_rcv); tls = so->so_rcv.sb_tls_info; if (tls == NULL) mode = TCP_TLS_MODE_NONE; else mode = tls->mode; SOCKBUF_UNLOCK(&so->so_rcv); return (mode); } int ktls_get_tx_mode(struct socket *so) { struct ktls_session *tls; struct inpcb *inp; int mode; inp = so->so_pcb; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK(&so->so_snd); tls = so->so_snd.sb_tls_info; if (tls == NULL) mode = TCP_TLS_MODE_NONE; else mode = tls->mode; SOCKBUF_UNLOCK(&so->so_snd); return (mode); } /* * Switch between SW and ifnet TLS sessions as requested. */ int ktls_set_tx_mode(struct socket *so, int mode) { struct ktls_session *tls, *tls_new; struct inpcb *inp; int error; switch (mode) { case TCP_TLS_MODE_SW: case TCP_TLS_MODE_IFNET: break; default: return (EINVAL); } inp = so->so_pcb; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK(&so->so_snd); tls = so->so_snd.sb_tls_info; if (tls == NULL) { SOCKBUF_UNLOCK(&so->so_snd); return (0); } if (tls->mode == mode) { SOCKBUF_UNLOCK(&so->so_snd); return (0); } tls = ktls_hold(tls); SOCKBUF_UNLOCK(&so->so_snd); INP_WUNLOCK(inp); tls_new = ktls_clone_session(tls); if (mode == TCP_TLS_MODE_IFNET) error = ktls_try_ifnet(so, tls_new, true); else error = ktls_try_sw(so, tls_new); if (error) { counter_u64_add(ktls_switch_failed, 1); ktls_free(tls_new); ktls_free(tls); INP_WLOCK(inp); return (error); } error = sblock(&so->so_snd, SBL_WAIT); if (error) { counter_u64_add(ktls_switch_failed, 1); ktls_free(tls_new); ktls_free(tls); INP_WLOCK(inp); return (error); } /* * If we raced with another session change, keep the existing * session. */ if (tls != so->so_snd.sb_tls_info) { counter_u64_add(ktls_switch_failed, 1); sbunlock(&so->so_snd); ktls_free(tls_new); ktls_free(tls); INP_WLOCK(inp); return (EBUSY); } SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_tls_info = tls_new; if (tls_new->mode != TCP_TLS_MODE_SW) so->so_snd.sb_flags |= SB_TLS_IFNET; SOCKBUF_UNLOCK(&so->so_snd); sbunlock(&so->so_snd); /* * Drop two references on 'tls'. The first is for the * ktls_hold() above. The second drops the reference from the * socket buffer. */ KASSERT(tls->refcount >= 2, ("too few references on old session")); ktls_free(tls); ktls_free(tls); if (mode == TCP_TLS_MODE_IFNET) counter_u64_add(ktls_switch_to_ifnet, 1); else counter_u64_add(ktls_switch_to_sw, 1); INP_WLOCK(inp); return (0); } /* * Try to allocate a new TLS send tag. This task is scheduled when * ip_output detects a route change while trying to transmit a packet * holding a TLS record. If a new tag is allocated, replace the tag * in the TLS session. Subsequent packets on the connection will use * the new tag. If a new tag cannot be allocated, drop the * connection. */ static void ktls_reset_send_tag(void *context, int pending) { struct epoch_tracker et; struct ktls_session *tls; struct m_snd_tag *old, *new; struct inpcb *inp; struct tcpcb *tp; int error; MPASS(pending == 1); tls = context; inp = tls->inp; /* * Free the old tag first before allocating a new one. * ip[6]_output_send() will treat a NULL send tag the same as * an ifp mismatch and drop packets until a new tag is * allocated. * * Write-lock the INP when changing tls->snd_tag since * ip[6]_output_send() holds a read-lock when reading the * pointer. */ INP_WLOCK(inp); old = tls->snd_tag; tls->snd_tag = NULL; INP_WUNLOCK(inp); if (old != NULL) m_snd_tag_rele(old); error = ktls_alloc_snd_tag(inp, tls, true, &new); if (error == 0) { INP_WLOCK(inp); tls->snd_tag = new; mtx_pool_lock(mtxpool_sleep, tls); tls->reset_pending = false; mtx_pool_unlock(mtxpool_sleep, tls); if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); counter_u64_add(ktls_ifnet_reset, 1); /* * XXX: Should we kick tcp_output explicitly now that * the send tag is fixed or just rely on timers? */ } else { NET_EPOCH_ENTER(et); INP_WLOCK(inp); if (!in_pcbrele_wlocked(inp)) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); CURVNET_SET(tp->t_vnet); tp = tcp_drop(tp, ECONNABORTED); CURVNET_RESTORE(); if (tp != NULL) INP_WUNLOCK(inp); counter_u64_add(ktls_ifnet_reset_dropped, 1); } else INP_WUNLOCK(inp); } NET_EPOCH_EXIT(et); counter_u64_add(ktls_ifnet_reset_failed, 1); /* * Leave reset_pending true to avoid future tasks while * the socket goes away. */ } ktls_free(tls); } int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls) { if (inp == NULL) return (ENOBUFS); INP_LOCK_ASSERT(inp); /* * See if we should schedule a task to update the send tag for * this session. */ mtx_pool_lock(mtxpool_sleep, tls); if (!tls->reset_pending) { (void) ktls_hold(tls); in_pcbref(inp); tls->inp = inp; tls->reset_pending = true; taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task); } mtx_pool_unlock(mtxpool_sleep, tls); return (ENOBUFS); } #endif void ktls_destroy(struct ktls_session *tls) { struct rm_priotracker prio; ktls_cleanup(tls); if (tls->be != NULL && ktls_allow_unload) { rm_rlock(&ktls_backends_lock, &prio); tls->be->use_count--; rm_runlock(&ktls_backends_lock, &prio); } uma_zfree(ktls_session_zone, tls); } void ktls_seq(struct sockbuf *sb, struct mbuf *m) { for (; m != NULL; m = m->m_next) { KASSERT((m->m_flags & M_EXTPG) != 0, ("ktls_seq: mapped mbuf %p", m)); m->m_epg_seqno = sb->sb_tls_seqno; sb->sb_tls_seqno++; } } /* * Add TLS framing (headers and trailers) to a chain of mbufs. Each * mbuf in the chain must be an unmapped mbuf. The payload of the * mbuf must be populated with the payload of each TLS record. * * The record_type argument specifies the TLS record type used when * populating the TLS header. * * The enq_count argument on return is set to the number of pages of * payload data for this entire chain that need to be encrypted via SW * encryption. The returned value should be passed to ktls_enqueue * when scheduling encryption of this chain of mbufs. */ void ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt, uint8_t record_type) { struct tls_record_layer *tlshdr; struct mbuf *m; uint64_t *noncep; uint16_t tls_len; int maxlen; maxlen = tls->params.max_frame_len; *enq_cnt = 0; for (m = top; m != NULL; m = m->m_next) { /* * All mbufs in the chain should be non-empty TLS * records whose payload does not exceed the maximum * frame length. */ KASSERT(m->m_len <= maxlen && m->m_len > 0, ("ktls_frame: m %p len %d\n", m, m->m_len)); /* * TLS frames require unmapped mbufs to store session * info. */ KASSERT((m->m_flags & M_EXTPG) != 0, ("ktls_frame: mapped mbuf %p (top = %p)\n", m, top)); tls_len = m->m_len; /* Save a reference to the session. */ m->m_epg_tls = ktls_hold(tls); m->m_epg_hdrlen = tls->params.tls_hlen; m->m_epg_trllen = tls->params.tls_tlen; if (tls->params.cipher_algorithm == CRYPTO_AES_CBC) { int bs, delta; /* * AES-CBC pads messages to a multiple of the * block size. Note that the padding is * applied after the digest and the encryption * is done on the "plaintext || mac || padding". * At least one byte of padding is always * present. * * Compute the final trailer length assuming * at most one block of padding. * tls->params.sb_tls_tlen is the maximum * possible trailer length (padding + digest). * delta holds the number of excess padding * bytes if the maximum were used. Those * extra bytes are removed. */ bs = tls->params.tls_bs; delta = (tls_len + tls->params.tls_tlen) & (bs - 1); m->m_epg_trllen -= delta; } m->m_len += m->m_epg_hdrlen + m->m_epg_trllen; /* Populate the TLS header. */ tlshdr = (void *)m->m_epg_hdr; tlshdr->tls_vmajor = tls->params.tls_vmajor; /* * TLS 1.3 masquarades as TLS 1.2 with a record type * of TLS_RLTYPE_APP. */ if (tls->params.tls_vminor == TLS_MINOR_VER_THREE && tls->params.tls_vmajor == TLS_MAJOR_VER_ONE) { tlshdr->tls_vminor = TLS_MINOR_VER_TWO; tlshdr->tls_type = TLS_RLTYPE_APP; /* save the real record type for later */ m->m_epg_record_type = record_type; m->m_epg_trail[0] = record_type; } else { tlshdr->tls_vminor = tls->params.tls_vminor; tlshdr->tls_type = record_type; } tlshdr->tls_length = htons(m->m_len - sizeof(*tlshdr)); /* * Store nonces / explicit IVs after the end of the * TLS header. * * For GCM with TLS 1.2, an 8 byte nonce is copied * from the end of the IV. The nonce is then * incremented for use by the next record. * * For CBC, a random nonce is inserted for TLS 1.1+. */ if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16 && tls->params.tls_vminor == TLS_MINOR_VER_TWO) { noncep = (uint64_t *)(tls->params.iv + 8); be64enc(tlshdr + 1, *noncep); (*noncep)++; } else if (tls->params.cipher_algorithm == CRYPTO_AES_CBC && tls->params.tls_vminor >= TLS_MINOR_VER_ONE) arc4rand(tlshdr + 1, AES_BLOCK_LEN, 0); /* * When using SW encryption, mark the mbuf not ready. * It will be marked ready via sbready() after the * record has been encrypted. * * When using ifnet TLS, unencrypted TLS records are * sent down the stack to the NIC. */ if (tls->mode == TCP_TLS_MODE_SW) { m->m_flags |= M_NOTREADY; m->m_epg_nrdy = m->m_epg_npgs; *enq_cnt += m->m_epg_npgs; } } } void ktls_enqueue_to_free(struct mbuf *m) { struct ktls_wq *wq; bool running; /* Mark it for freeing. */ m->m_epg_flags |= EPG_FLAG_2FREE; wq = &ktls_wq[m->m_epg_tls->wq_index]; mtx_lock(&wq->mtx); STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq); running = wq->running; mtx_unlock(&wq->mtx); if (!running) wakeup(wq); } void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count) { struct ktls_wq *wq; bool running; KASSERT(((m->m_flags & (M_EXTPG | M_NOTREADY)) == (M_EXTPG | M_NOTREADY)), ("ktls_enqueue: %p not unready & nomap mbuf\n", m)); KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count")); KASSERT(m->m_epg_tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf")); m->m_epg_enc_cnt = page_count; /* * Save a pointer to the socket. The caller is responsible * for taking an additional reference via soref(). */ m->m_epg_so = so; wq = &ktls_wq[m->m_epg_tls->wq_index]; mtx_lock(&wq->mtx); STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq); running = wq->running; mtx_unlock(&wq->mtx); if (!running) wakeup(wq); counter_u64_add(ktls_cnt_on, 1); } static __noinline void ktls_encrypt(struct mbuf *top) { struct ktls_session *tls; struct socket *so; struct mbuf *m; vm_paddr_t parray[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; struct iovec src_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; struct iovec dst_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; vm_page_t pg; int error, i, len, npages, off, total_pages; bool is_anon; so = top->m_epg_so; tls = top->m_epg_tls; KASSERT(tls != NULL, ("tls = NULL, top = %p\n", top)); KASSERT(so != NULL, ("so = NULL, top = %p\n", top)); #ifdef INVARIANTS top->m_epg_so = NULL; #endif total_pages = top->m_epg_enc_cnt; npages = 0; /* * Encrypt the TLS records in the chain of mbufs starting with * 'top'. 'total_pages' gives us a total count of pages and is * used to know when we have finished encrypting the TLS * records originally queued with 'top'. * * NB: These mbufs are queued in the socket buffer and * 'm_next' is traversing the mbufs in the socket buffer. The * socket buffer lock is not held while traversing this chain. * Since the mbufs are all marked M_NOTREADY their 'm_next' * pointers should be stable. However, the 'm_next' of the * last mbuf encrypted is not necessarily NULL. It can point * to other mbufs appended while 'top' was on the TLS work * queue. * * Each mbuf holds an entire TLS record. */ error = 0; for (m = top; npages != total_pages; m = m->m_next) { KASSERT(m->m_epg_tls == tls, ("different TLS sessions in a single mbuf chain: %p vs %p", tls, m->m_epg_tls)); KASSERT((m->m_flags & (M_EXTPG | M_NOTREADY)) == (M_EXTPG | M_NOTREADY), ("%p not unready & nomap mbuf (top = %p)\n", m, top)); KASSERT(npages + m->m_epg_npgs <= total_pages, ("page count mismatch: top %p, total_pages %d, m %p", top, total_pages, m)); /* * Generate source and destination ivoecs to pass to * the SW encryption backend. For writable mbufs, the * destination iovec is a copy of the source and * encryption is done in place. For file-backed mbufs * (from sendfile), anonymous wired pages are * allocated and assigned to the destination iovec. */ is_anon = (m->m_epg_flags & EPG_FLAG_ANON) != 0; off = m->m_epg_1st_off; for (i = 0; i < m->m_epg_npgs; i++, off = 0) { len = m_epg_pagelen(m, i, off); src_iov[i].iov_len = len; src_iov[i].iov_base = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]) + off; if (is_anon) { dst_iov[i].iov_base = src_iov[i].iov_base; dst_iov[i].iov_len = src_iov[i].iov_len; continue; } retry_page: pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); if (pg == NULL) { vm_wait(NULL); goto retry_page; } parray[i] = VM_PAGE_TO_PHYS(pg); dst_iov[i].iov_base = (char *)(void *)PHYS_TO_DMAP(parray[i]) + off; dst_iov[i].iov_len = len; } npages += i; error = (*tls->sw_encrypt)(tls, (const struct tls_record_layer *)m->m_epg_hdr, m->m_epg_trail, src_iov, dst_iov, i, m->m_epg_seqno, m->m_epg_record_type); if (error) { counter_u64_add(ktls_offload_failed_crypto, 1); break; } /* * For file-backed mbufs, release the file-backed * pages and replace them in the ext_pgs array with * the anonymous wired pages allocated above. */ if (!is_anon) { /* Free the old pages. */ m->m_ext.ext_free(m); /* Replace them with the new pages. */ for (i = 0; i < m->m_epg_npgs; i++) m->m_epg_pa[i] = parray[i]; /* Use the basic free routine. */ m->m_ext.ext_free = mb_free_mext_pgs; /* Pages are now writable. */ m->m_epg_flags |= EPG_FLAG_ANON; } /* * Drop a reference to the session now that it is no * longer needed. Existing code depends on encrypted * records having no associated session vs * yet-to-be-encrypted records having an associated * session. */ m->m_epg_tls = NULL; ktls_free(tls); } CURVNET_SET(so->so_vnet); if (error == 0) { (void)(*so->so_proto->pr_usrreqs->pru_ready)(so, top, npages); } else { so->so_proto->pr_usrreqs->pru_abort(so); so->so_error = EIO; mb_free_notready(top, total_pages); } SOCK_LOCK(so); sorele(so); CURVNET_RESTORE(); } static void ktls_work_thread(void *ctx) { struct ktls_wq *wq = ctx; struct mbuf *m, *n; STAILQ_HEAD(, mbuf) local_head; #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) fpu_kern_thread(0); #endif for (;;) { mtx_lock(&wq->mtx); while (STAILQ_EMPTY(&wq->head)) { wq->running = false; mtx_sleep(wq, &wq->mtx, 0, "-", 0); wq->running = true; } STAILQ_INIT(&local_head); STAILQ_CONCAT(&local_head, &wq->head); mtx_unlock(&wq->mtx); STAILQ_FOREACH_SAFE(m, &local_head, m_epg_stailq, n) { if (m->m_epg_flags & EPG_FLAG_2FREE) { ktls_free(m->m_epg_tls); uma_zfree(zone_mbuf, m); } else { ktls_encrypt(m); counter_u64_add(ktls_cnt_on, -1); } } } } Index: head/sys/netinet/netdump/netdump_client.c =================================================================== --- head/sys/netinet/netdump/netdump_client.c (revision 362623) +++ head/sys/netinet/netdump/netdump_client.c (revision 362624) @@ -1,819 +1,815 @@ /*- * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * netdump_client.c * FreeBSD subsystem supporting netdump network dumps. * A dedicated server must be running to accept client dumps. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NETDDEBUGV(f, ...) do { \ if (nd_debug > 1) \ printf(("%s: " f), __func__, ## __VA_ARGS__); \ } while (0) static int netdump_configure(struct diocskerneldump_arg *, struct thread *); static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length); static bool netdump_enabled(void); static int netdump_enabled_sysctl(SYSCTL_HANDLER_ARGS); static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td); static int netdump_modevent(module_t mod, int type, void *priv); static int netdump_start(struct dumperinfo *di); static void netdump_unconfigure(void); /* Must be at least as big as the chunks dumpsys() gives us. */ static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE]; static int dump_failed; /* Configuration parameters. */ static struct { char ndc_iface[IFNAMSIZ]; union kd_ip ndc_server; union kd_ip ndc_client; union kd_ip ndc_gateway; uint8_t ndc_af; /* Runtime State */ struct debugnet_pcb *nd_pcb; off_t nd_tx_off; size_t nd_buf_len; } nd_conf; #define nd_server nd_conf.ndc_server.in4 #define nd_client nd_conf.ndc_client.in4 #define nd_gateway nd_conf.ndc_gateway.in4 /* General dynamic settings. */ static struct sx nd_conf_lk; SX_SYSINIT(nd_conf, &nd_conf_lk, "netdump configuration lock"); #define NETDUMP_WLOCK() sx_xlock(&nd_conf_lk) #define NETDUMP_WUNLOCK() sx_xunlock(&nd_conf_lk) #define NETDUMP_RLOCK() sx_slock(&nd_conf_lk) #define NETDUMP_RUNLOCK() sx_sunlock(&nd_conf_lk) #define NETDUMP_ASSERT_WLOCKED() sx_assert(&nd_conf_lk, SA_XLOCKED) #define NETDUMP_ASSERT_LOCKED() sx_assert(&nd_conf_lk, SA_LOCKED) static struct ifnet *nd_ifp; static eventhandler_tag nd_detach_cookie; FEATURE(netdump, "Netdump client support"); static SYSCTL_NODE(_net, OID_AUTO, netdump, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netdump parameters"); static int nd_debug; SYSCTL_INT(_net_netdump, OID_AUTO, debug, CTLFLAG_RWTUN, &nd_debug, 0, "Debug message verbosity"); SYSCTL_PROC(_net_netdump, OID_AUTO, enabled, CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE, NULL, 0, netdump_enabled_sysctl, "I", "netdump configuration status"); static char nd_path[MAXPATHLEN]; SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW, nd_path, sizeof(nd_path), "Server path for output files"); /* * The following three variables were moved to debugnet(4), but these knobs * were retained as aliases. */ SYSCTL_INT(_net_netdump, OID_AUTO, polls, CTLFLAG_RWTUN, &debugnet_npolls, 0, "Number of times to poll before assuming packet loss (0.5ms per poll)"); SYSCTL_INT(_net_netdump, OID_AUTO, retries, CTLFLAG_RWTUN, &debugnet_nretries, 0, "Number of retransmit attempts before giving up"); SYSCTL_INT(_net_netdump, OID_AUTO, arp_retries, CTLFLAG_RWTUN, &debugnet_arp_nretries, 0, "Number of ARP attempts before giving up"); static bool nd_is_enabled; static bool netdump_enabled(void) { NETDUMP_ASSERT_LOCKED(); return (nd_is_enabled); } static void netdump_set_enabled(bool status) { NETDUMP_ASSERT_LOCKED(); nd_is_enabled = status; } static int netdump_enabled_sysctl(SYSCTL_HANDLER_ARGS) { int en, error; NETDUMP_RLOCK(); en = netdump_enabled(); NETDUMP_RUNLOCK(); error = SYSCTL_OUT(req, &en, sizeof(en)); if (error != 0 || req->newptr == NULL) return (error); return (EPERM); } /*- * Dumping specific primitives. */ /* * Flush any buffered vmcore data. */ static int netdump_flush_buf(void) { int error; error = 0; if (nd_conf.nd_buf_len != 0) { struct debugnet_proto_aux auxdata = { .dp_offset_start = nd_conf.nd_tx_off, }; error = debugnet_send(nd_conf.nd_pcb, DEBUGNET_DATA, nd_buf, nd_conf.nd_buf_len, &auxdata); if (error == 0) nd_conf.nd_buf_len = 0; } return (error); } /* * Callback from dumpsys() to dump a chunk of memory. * Copies it out to our static buffer then sends it across the network. * Detects the initial KDH and makes sure it is given a special packet type. * * Parameters: * priv Unused. Optional private pointer. * virtual Virtual address (where to read the data from) * physical Unused. Physical memory address. * offset Offset from start of core file * length Data length * * Return value: * 0 on success * errno on error */ static int netdump_dumper(void *priv __unused, void *virtual, vm_offset_t physical __unused, off_t offset, size_t length) { int error; NETDDEBUGV("netdump_dumper(NULL, %p, NULL, %ju, %zu)\n", virtual, (uintmax_t)offset, length); if (virtual == NULL) { error = netdump_flush_buf(); if (error != 0) dump_failed = 1; if (dump_failed != 0) printf("failed to dump the kernel core\n"); else if ( debugnet_sendempty(nd_conf.nd_pcb, DEBUGNET_FINISHED) != 0) printf("failed to close the transaction\n"); else printf("\nnetdump finished.\n"); debugnet_free(nd_conf.nd_pcb); nd_conf.nd_pcb = NULL; return (0); } if (length > sizeof(nd_buf)) return (ENOSPC); if (nd_conf.nd_buf_len + length > sizeof(nd_buf) || (nd_conf.nd_buf_len != 0 && nd_conf.nd_tx_off + nd_conf.nd_buf_len != offset)) { error = netdump_flush_buf(); if (error != 0) { dump_failed = 1; return (error); } nd_conf.nd_tx_off = offset; } memmove(nd_buf + nd_conf.nd_buf_len, virtual, length); nd_conf.nd_buf_len += length; return (0); } /* * Perform any initialization needed prior to transmitting the kernel core. */ static int netdump_start(struct dumperinfo *di) { struct debugnet_conn_params dcp; struct debugnet_pcb *pcb; char buf[INET_ADDRSTRLEN]; int error; error = 0; /* Check if the dumping is allowed to continue. */ if (!netdump_enabled()) return (EINVAL); if (!KERNEL_PANICKED()) { printf( "netdump_start: netdump may only be used after a panic\n"); return (EINVAL); } memset(&dcp, 0, sizeof(dcp)); if (nd_server.s_addr == INADDR_ANY) { printf("netdump_start: can't netdump; no server IP given\n"); return (EINVAL); } /* We start dumping at offset 0. */ di->dumpoff = 0; dcp.dc_ifp = nd_ifp; dcp.dc_client = nd_client.s_addr; dcp.dc_server = nd_server.s_addr; dcp.dc_gateway = nd_gateway.s_addr; dcp.dc_herald_port = NETDUMP_PORT; dcp.dc_client_port = NETDUMP_ACKPORT; dcp.dc_herald_data = nd_path; dcp.dc_herald_datalen = (nd_path[0] == 0) ? 0 : strlen(nd_path) + 1; error = debugnet_connect(&dcp, &pcb); if (error != 0) { printf("failed to contact netdump server\n"); /* Squash debugnet to something the dumper code understands. */ return (EINVAL); } printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf), debugnet_get_gw_mac(pcb), ":"); nd_conf.nd_pcb = pcb; return (0); } static int netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh, void *key, uint32_t keysize) { int error; error = netdump_flush_buf(); if (error != 0) return (error); memcpy(nd_buf, kdh, sizeof(*kdh)); error = debugnet_send(nd_conf.nd_pcb, NETDUMP_KDH, nd_buf, sizeof(*kdh), NULL); if (error == 0 && keysize > 0) { if (keysize > sizeof(nd_buf)) return (EINVAL); memcpy(nd_buf, key, keysize); error = debugnet_send(nd_conf.nd_pcb, NETDUMP_EKCD_KEY, nd_buf, keysize, NULL); } return (error); } /*- * KLD specific code. */ static struct cdevsw netdump_cdevsw = { .d_version = D_VERSION, .d_ioctl = netdump_ioctl, .d_name = "netdump", }; static struct cdev *netdump_cdev; static void netdump_unconfigure(void) { struct diocskerneldump_arg kda; NETDUMP_ASSERT_WLOCKED(); KASSERT(netdump_enabled(), ("%s: not enabled", __func__)); bzero(&kda, sizeof(kda)); kda.kda_index = KDA_REMOVE_DEV; (void)dumper_remove(nd_conf.ndc_iface, &kda); if (nd_ifp != NULL) if_rele(nd_ifp); nd_ifp = NULL; netdump_set_enabled(false); log(LOG_WARNING, "netdump: Lost configured interface %s\n", nd_conf.ndc_iface); bzero(&nd_conf, sizeof(nd_conf)); } static void netdump_ifdetach(void *arg __unused, struct ifnet *ifp) { NETDUMP_WLOCK(); if (ifp == nd_ifp) netdump_unconfigure(); NETDUMP_WUNLOCK(); } /* * td of NULL is a sentinel value that indicates a kernel caller (ddb(4) or * modload-based tunable parameters). */ static int netdump_configure(struct diocskerneldump_arg *conf, struct thread *td) { struct ifnet *ifp; NETDUMP_ASSERT_WLOCKED(); if (conf->kda_iface[0] != 0) { if (td != NULL && !IS_DEFAULT_VNET(TD_TO_VNET(td))) return (EINVAL); CURVNET_SET(vnet0); ifp = ifunit_ref(conf->kda_iface); CURVNET_RESTORE(); } else ifp = NULL; if (nd_ifp != NULL) if_rele(nd_ifp); nd_ifp = ifp; netdump_set_enabled(true); #define COPY_SIZED(elm) do { \ _Static_assert(sizeof(nd_conf.ndc_ ## elm) == \ sizeof(conf->kda_ ## elm), "elm " __XSTRING(elm) " mismatch"); \ memcpy(&nd_conf.ndc_ ## elm, &conf->kda_ ## elm, \ sizeof(nd_conf.ndc_ ## elm)); \ } while (0) COPY_SIZED(iface); COPY_SIZED(server); COPY_SIZED(client); COPY_SIZED(gateway); COPY_SIZED(af); #undef COPY_SIZED return (0); } /* * ioctl(2) handler for the netdump device. This is currently only used to * register netdump as a dump device. * * Parameters: * dev, Unused. * cmd, The ioctl to be handled. * addr, The parameter for the ioctl. * flags, Unused. * td, The thread invoking this ioctl. * * Returns: * 0 on success, and an errno value on failure. */ static int netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td) { struct diocskerneldump_arg kda_copy, *conf; struct dumperinfo dumper; uint8_t *encryptedkey; int error; #ifdef COMPAT_FREEBSD11 u_int u; #endif #ifdef COMPAT_FREEBSD12 struct diocskerneldump_arg_freebsd12 *kda12; struct netdump_conf_freebsd12 *conf12; #endif conf = NULL; error = 0; NETDUMP_WLOCK(); switch (cmd) { #ifdef COMPAT_FREEBSD11 case DIOCSKERNELDUMP_FREEBSD11: gone_in(13, "11.x ABI compatibility"); u = *(u_int *)addr; if (u != 0) { error = ENXIO; break; } if (netdump_enabled()) netdump_unconfigure(); break; #endif #ifdef COMPAT_FREEBSD12 /* * Used by dumpon(8) in 12.x for clearing previous * configuration -- then NETDUMPSCONF_FREEBSD12 is used to * actually configure netdump. */ case DIOCSKERNELDUMP_FREEBSD12: gone_in(14, "12.x ABI compatibility"); kda12 = (void *)addr; if (kda12->kda12_enable) { error = ENXIO; break; } if (netdump_enabled()) netdump_unconfigure(); break; case NETDUMPGCONF_FREEBSD12: gone_in(14, "FreeBSD 12.x ABI compat"); conf12 = (void *)addr; if (!netdump_enabled()) { error = ENXIO; break; } if (nd_conf.ndc_af != AF_INET) { error = EOPNOTSUPP; break; } if (nd_ifp != NULL) strlcpy(conf12->ndc12_iface, nd_ifp->if_xname, sizeof(conf12->ndc12_iface)); memcpy(&conf12->ndc12_server, &nd_server, sizeof(conf12->ndc12_server)); memcpy(&conf12->ndc12_client, &nd_client, sizeof(conf12->ndc12_client)); memcpy(&conf12->ndc12_gateway, &nd_gateway, sizeof(conf12->ndc12_gateway)); break; #endif case DIOCGKERNELDUMP: conf = (void *)addr; /* * For now, index is ignored; netdump doesn't support multiple * configurations (yet). */ if (!netdump_enabled()) { error = ENXIO; conf = NULL; break; } if (nd_ifp != NULL) strlcpy(conf->kda_iface, nd_ifp->if_xname, sizeof(conf->kda_iface)); memcpy(&conf->kda_server, &nd_server, sizeof(nd_server)); memcpy(&conf->kda_client, &nd_client, sizeof(nd_client)); memcpy(&conf->kda_gateway, &nd_gateway, sizeof(nd_gateway)); conf->kda_af = nd_conf.ndc_af; conf = NULL; break; #ifdef COMPAT_FREEBSD12 case NETDUMPSCONF_FREEBSD12: gone_in(14, "FreeBSD 12.x ABI compat"); conf12 = (struct netdump_conf_freebsd12 *)addr; _Static_assert(offsetof(struct diocskerneldump_arg, kda_server) == offsetof(struct netdump_conf_freebsd12, ndc12_server), "simplifying assumption"); memset(&kda_copy, 0, sizeof(kda_copy)); memcpy(&kda_copy, conf12, offsetof(struct diocskerneldump_arg, kda_server)); /* 12.x ABI could only configure IPv4 (INET) netdump. */ kda_copy.kda_af = AF_INET; memcpy(&kda_copy.kda_server.in4, &conf12->ndc12_server, sizeof(struct in_addr)); memcpy(&kda_copy.kda_client.in4, &conf12->ndc12_client, sizeof(struct in_addr)); memcpy(&kda_copy.kda_gateway.in4, &conf12->ndc12_gateway, sizeof(struct in_addr)); kda_copy.kda_index = (conf12->ndc12_kda.kda12_enable ? 0 : KDA_REMOVE_ALL); conf = &kda_copy; explicit_bzero(conf12, sizeof(*conf12)); /* FALLTHROUGH */ #endif case DIOCSKERNELDUMP: encryptedkey = NULL; if (cmd == DIOCSKERNELDUMP) { conf = (void *)addr; memcpy(&kda_copy, conf, sizeof(kda_copy)); } /* Netdump only supports IP4 at this time. */ if (conf->kda_af != AF_INET) { error = EPROTONOSUPPORT; break; } conf->kda_iface[sizeof(conf->kda_iface) - 1] = '\0'; if (conf->kda_index == KDA_REMOVE || conf->kda_index == KDA_REMOVE_DEV || conf->kda_index == KDA_REMOVE_ALL) { if (netdump_enabled()) netdump_unconfigure(); if (conf->kda_index == KDA_REMOVE_ALL) error = dumper_remove(NULL, conf); break; } error = netdump_configure(conf, td); if (error != 0) break; if (conf->kda_encryption != KERNELDUMP_ENC_NONE) { if (conf->kda_encryptedkeysize <= 0 || conf->kda_encryptedkeysize > KERNELDUMP_ENCKEY_MAX_SIZE) { error = EINVAL; break; } encryptedkey = malloc(conf->kda_encryptedkeysize, M_TEMP, M_WAITOK); error = copyin(conf->kda_encryptedkey, encryptedkey, conf->kda_encryptedkeysize); if (error != 0) { free(encryptedkey, M_TEMP); break; } conf->kda_encryptedkey = encryptedkey; } memset(&dumper, 0, sizeof(dumper)); dumper.dumper_start = netdump_start; dumper.dumper_hdr = netdump_write_headers; dumper.dumper = netdump_dumper; dumper.priv = NULL; dumper.blocksize = NETDUMP_DATASIZE; dumper.maxiosize = MAXDUMPPGS * PAGE_SIZE; dumper.mediaoffset = 0; dumper.mediasize = 0; error = dumper_insert(&dumper, conf->kda_iface, conf); - if (encryptedkey != NULL) { - explicit_bzero(encryptedkey, - conf->kda_encryptedkeysize); - free(encryptedkey, M_TEMP); - } + zfree(encryptedkey, M_TEMP); if (error != 0) netdump_unconfigure(); break; default: error = ENOTTY; break; } explicit_bzero(&kda_copy, sizeof(kda_copy)); if (conf != NULL) explicit_bzero(conf, sizeof(*conf)); NETDUMP_WUNLOCK(); return (error); } /* * Called upon system init or kld load. Initializes the netdump parameters to * sane defaults (locates the first available NIC and uses the first IPv4 IP on * that card as the client IP). Leaves the server IP unconfigured. * * Parameters: * mod, Unused. * what, The module event type. * priv, Unused. * * Returns: * int, An errno value if an error occured, 0 otherwise. */ static int netdump_modevent(module_t mod __unused, int what, void *priv __unused) { struct diocskerneldump_arg conf; char *arg; int error; error = 0; switch (what) { case MOD_LOAD: error = make_dev_p(MAKEDEV_WAITOK, &netdump_cdev, &netdump_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "netdump"); if (error != 0) return (error); nd_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event, netdump_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if ((arg = kern_getenv("net.dump.iface")) != NULL) { strlcpy(conf.kda_iface, arg, sizeof(conf.kda_iface)); freeenv(arg); if ((arg = kern_getenv("net.dump.server")) != NULL) { inet_aton(arg, &conf.kda_server.in4); freeenv(arg); } if ((arg = kern_getenv("net.dump.client")) != NULL) { inet_aton(arg, &conf.kda_client.in4); freeenv(arg); } if ((arg = kern_getenv("net.dump.gateway")) != NULL) { inet_aton(arg, &conf.kda_gateway.in4); freeenv(arg); } conf.kda_af = AF_INET; /* Ignore errors; we print a message to the console. */ NETDUMP_WLOCK(); (void)netdump_configure(&conf, NULL); NETDUMP_WUNLOCK(); } break; case MOD_UNLOAD: NETDUMP_WLOCK(); if (netdump_enabled()) { printf("netdump: disabling dump device for unload\n"); netdump_unconfigure(); } NETDUMP_WUNLOCK(); destroy_dev(netdump_cdev); EVENTHANDLER_DEREGISTER(ifnet_departure_event, nd_detach_cookie); break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t netdump_mod = { "netdump", netdump_modevent, NULL, }; MODULE_VERSION(netdump, 1); DECLARE_MODULE(netdump, netdump_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); #ifdef DDB /* * Usage: netdump -s [-g -i * * Order is not significant. * * Currently, this command does not support configuring encryption or * compression. */ DB_FUNC(netdump, db_netdump_cmd, db_cmd_table, CS_OWN, NULL) { static struct diocskerneldump_arg conf; static char blockbuf[NETDUMP_DATASIZE]; static union { struct dumperinfo di; /* For valid di_devname. */ char di_buf[sizeof(struct dumperinfo) + 1]; } u; struct debugnet_ddb_config params; int error; error = debugnet_parse_ddb_cmd("netdump", ¶ms); if (error != 0) { db_printf("Error configuring netdump: %d\n", error); return; } /* Translate to a netdump dumper config. */ memset(&conf, 0, sizeof(conf)); if (params.dd_ifp != NULL) strlcpy(conf.kda_iface, if_name(params.dd_ifp), sizeof(conf.kda_iface)); conf.kda_af = AF_INET; conf.kda_server.in4 = (struct in_addr) { params.dd_server }; if (params.dd_has_client) conf.kda_client.in4 = (struct in_addr) { params.dd_client }; else conf.kda_client.in4 = (struct in_addr) { INADDR_ANY }; if (params.dd_has_gateway) conf.kda_gateway.in4 = (struct in_addr) { params.dd_gateway }; else conf.kda_gateway.in4 = (struct in_addr) { INADDR_ANY }; /* Set the global netdump config to these options. */ error = netdump_configure(&conf, NULL); if (error != 0) { db_printf("Error enabling netdump: %d\n", error); return; } /* Fake the generic dump configuration list entry to avoid malloc. */ memset(&u.di_buf, 0, sizeof(u.di_buf)); u.di.dumper_start = netdump_start; u.di.dumper_hdr = netdump_write_headers; u.di.dumper = netdump_dumper; u.di.priv = NULL; u.di.blocksize = NETDUMP_DATASIZE; u.di.maxiosize = MAXDUMPPGS * PAGE_SIZE; u.di.mediaoffset = 0; u.di.mediasize = 0; u.di.blockbuf = blockbuf; dumper_ddb_insert(&u.di); error = doadump(false); dumper_ddb_remove(&u.di); if (error != 0) db_printf("Cannot dump: %d\n", error); } #endif /* DDB */ Index: head/sys/opencrypto/crypto.c =================================================================== --- head/sys/opencrypto/crypto.c (revision 362623) +++ head/sys/opencrypto/crypto.c (revision 362624) @@ -1,2316 +1,2315 @@ /*- * Copyright (c) 2002-2006 Sam Leffler. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Cryptographic Subsystem. * * This code is derived from the Openbsd Cryptographic Framework (OCF) * that has the copyright shown below. Very little of the original * code remains. */ /*- * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu) * * This code was written by Angelos D. Keromytis in Athens, Greece, in * February 2000. Network Security Technologies Inc. (NSTI) kindly * supported the development of this code. * * Copyright (c) 2000, 2001 Angelos D. Keromytis * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all source code copies of any software which is or includes a copy or * modification of this software. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #define CRYPTO_TIMING /* enable timing support */ #include "opt_compat.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cryptodev_if.h" #if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) #include #endif SDT_PROVIDER_DEFINE(opencrypto); /* * Crypto drivers register themselves by allocating a slot in the * crypto_drivers table with crypto_get_driverid() and then registering * each asym algorithm they support with crypto_kregister(). */ static struct mtx crypto_drivers_mtx; /* lock on driver table */ #define CRYPTO_DRIVER_LOCK() mtx_lock(&crypto_drivers_mtx) #define CRYPTO_DRIVER_UNLOCK() mtx_unlock(&crypto_drivers_mtx) #define CRYPTO_DRIVER_ASSERT() mtx_assert(&crypto_drivers_mtx, MA_OWNED) /* * Crypto device/driver capabilities structure. * * Synchronization: * (d) - protected by CRYPTO_DRIVER_LOCK() * (q) - protected by CRYPTO_Q_LOCK() * Not tagged fields are read-only. */ struct cryptocap { device_t cc_dev; uint32_t cc_hid; u_int32_t cc_sessions; /* (d) # of sessions */ u_int32_t cc_koperations; /* (d) # os asym operations */ u_int8_t cc_kalg[CRK_ALGORITHM_MAX + 1]; int cc_flags; /* (d) flags */ #define CRYPTOCAP_F_CLEANUP 0x80000000 /* needs resource cleanup */ int cc_qblocked; /* (q) symmetric q blocked */ int cc_kqblocked; /* (q) asymmetric q blocked */ size_t cc_session_size; volatile int cc_refs; }; static struct cryptocap **crypto_drivers = NULL; static int crypto_drivers_size = 0; struct crypto_session { struct cryptocap *cap; void *softc; struct crypto_session_params csp; }; /* * There are two queues for crypto requests; one for symmetric (e.g. * cipher) operations and one for asymmetric (e.g. MOD)operations. * A single mutex is used to lock access to both queues. We could * have one per-queue but having one simplifies handling of block/unblock * operations. */ static int crp_sleep = 0; static TAILQ_HEAD(cryptop_q ,cryptop) crp_q; /* request queues */ static TAILQ_HEAD(,cryptkop) crp_kq; static struct mtx crypto_q_mtx; #define CRYPTO_Q_LOCK() mtx_lock(&crypto_q_mtx) #define CRYPTO_Q_UNLOCK() mtx_unlock(&crypto_q_mtx) SYSCTL_NODE(_kern, OID_AUTO, crypto, CTLFLAG_RW, 0, "In-kernel cryptography"); /* * Taskqueue used to dispatch the crypto requests * that have the CRYPTO_F_ASYNC flag */ static struct taskqueue *crypto_tq; /* * Crypto seq numbers are operated on with modular arithmetic */ #define CRYPTO_SEQ_GT(a,b) ((int)((a)-(b)) > 0) struct crypto_ret_worker { struct mtx crypto_ret_mtx; TAILQ_HEAD(,cryptop) crp_ordered_ret_q; /* ordered callback queue for symetric jobs */ TAILQ_HEAD(,cryptop) crp_ret_q; /* callback queue for symetric jobs */ TAILQ_HEAD(,cryptkop) crp_ret_kq; /* callback queue for asym jobs */ u_int32_t reorder_ops; /* total ordered sym jobs received */ u_int32_t reorder_cur_seq; /* current sym job dispatched */ struct proc *cryptoretproc; }; static struct crypto_ret_worker *crypto_ret_workers = NULL; #define CRYPTO_RETW(i) (&crypto_ret_workers[i]) #define CRYPTO_RETW_ID(w) ((w) - crypto_ret_workers) #define FOREACH_CRYPTO_RETW(w) \ for (w = crypto_ret_workers; w < crypto_ret_workers + crypto_workers_num; ++w) #define CRYPTO_RETW_LOCK(w) mtx_lock(&w->crypto_ret_mtx) #define CRYPTO_RETW_UNLOCK(w) mtx_unlock(&w->crypto_ret_mtx) #define CRYPTO_RETW_EMPTY(w) \ (TAILQ_EMPTY(&w->crp_ret_q) && TAILQ_EMPTY(&w->crp_ret_kq) && TAILQ_EMPTY(&w->crp_ordered_ret_q)) static int crypto_workers_num = 0; SYSCTL_INT(_kern_crypto, OID_AUTO, num_workers, CTLFLAG_RDTUN, &crypto_workers_num, 0, "Number of crypto workers used to dispatch crypto jobs"); #ifdef COMPAT_FREEBSD12 SYSCTL_INT(_kern, OID_AUTO, crypto_workers_num, CTLFLAG_RDTUN, &crypto_workers_num, 0, "Number of crypto workers used to dispatch crypto jobs"); #endif static uma_zone_t cryptop_zone; static uma_zone_t cryptoses_zone; int crypto_userasymcrypto = 1; SYSCTL_INT(_kern_crypto, OID_AUTO, asym_enable, CTLFLAG_RW, &crypto_userasymcrypto, 0, "Enable user-mode access to asymmetric crypto support"); #ifdef COMPAT_FREEBSD12 SYSCTL_INT(_kern, OID_AUTO, userasymcrypto, CTLFLAG_RW, &crypto_userasymcrypto, 0, "Enable/disable user-mode access to asymmetric crypto support"); #endif int crypto_devallowsoft = 0; SYSCTL_INT(_kern_crypto, OID_AUTO, allow_soft, CTLFLAG_RW, &crypto_devallowsoft, 0, "Enable use of software crypto by /dev/crypto"); #ifdef COMPAT_FREEBSD12 SYSCTL_INT(_kern, OID_AUTO, cryptodevallowsoft, CTLFLAG_RW, &crypto_devallowsoft, 0, "Enable/disable use of software crypto by /dev/crypto"); #endif MALLOC_DEFINE(M_CRYPTO_DATA, "crypto", "crypto session records"); static void crypto_proc(void); static struct proc *cryptoproc; static void crypto_ret_proc(struct crypto_ret_worker *ret_worker); static void crypto_destroy(void); static int crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint); static int crypto_kinvoke(struct cryptkop *krp); static void crypto_task_invoke(void *ctx, int pending); static void crypto_batch_enqueue(struct cryptop *crp); static struct cryptostats cryptostats; SYSCTL_STRUCT(_kern_crypto, OID_AUTO, stats, CTLFLAG_RW, &cryptostats, cryptostats, "Crypto system statistics"); #ifdef CRYPTO_TIMING static int crypto_timing = 0; SYSCTL_INT(_debug, OID_AUTO, crypto_timing, CTLFLAG_RW, &crypto_timing, 0, "Enable/disable crypto timing support"); #endif /* Try to avoid directly exposing the key buffer as a symbol */ static struct keybuf *keybuf; static struct keybuf empty_keybuf = { .kb_nents = 0 }; /* Obtain the key buffer from boot metadata */ static void keybuf_init(void) { caddr_t kmdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); keybuf = (struct keybuf *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_KEYBUF); if (keybuf == NULL) keybuf = &empty_keybuf; } /* It'd be nice if we could store these in some kind of secure memory... */ struct keybuf * get_keybuf(void) { return (keybuf); } static struct cryptocap * cap_ref(struct cryptocap *cap) { refcount_acquire(&cap->cc_refs); return (cap); } static void cap_rele(struct cryptocap *cap) { if (refcount_release(&cap->cc_refs) == 0) return; KASSERT(cap->cc_sessions == 0, ("freeing crypto driver with active sessions")); KASSERT(cap->cc_koperations == 0, ("freeing crypto driver with active key operations")); free(cap, M_CRYPTO_DATA); } static int crypto_init(void) { struct crypto_ret_worker *ret_worker; int error; mtx_init(&crypto_drivers_mtx, "crypto", "crypto driver table", MTX_DEF|MTX_QUIET); TAILQ_INIT(&crp_q); TAILQ_INIT(&crp_kq); mtx_init(&crypto_q_mtx, "crypto", "crypto op queues", MTX_DEF); cryptop_zone = uma_zcreate("cryptop", sizeof (struct cryptop), 0, 0, 0, 0, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cryptoses_zone = uma_zcreate("crypto_session", sizeof(struct crypto_session), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); if (cryptop_zone == NULL || cryptoses_zone == NULL) { printf("crypto_init: cannot setup crypto zones\n"); error = ENOMEM; goto bad; } crypto_drivers_size = CRYPTO_DRIVERS_INITIAL; crypto_drivers = malloc(crypto_drivers_size * sizeof(struct cryptocap), M_CRYPTO_DATA, M_NOWAIT | M_ZERO); if (crypto_drivers == NULL) { printf("crypto_init: cannot setup crypto drivers\n"); error = ENOMEM; goto bad; } if (crypto_workers_num < 1 || crypto_workers_num > mp_ncpus) crypto_workers_num = mp_ncpus; crypto_tq = taskqueue_create("crypto", M_WAITOK|M_ZERO, taskqueue_thread_enqueue, &crypto_tq); if (crypto_tq == NULL) { printf("crypto init: cannot setup crypto taskqueue\n"); error = ENOMEM; goto bad; } taskqueue_start_threads(&crypto_tq, crypto_workers_num, PRI_MIN_KERN, "crypto"); error = kproc_create((void (*)(void *)) crypto_proc, NULL, &cryptoproc, 0, 0, "crypto"); if (error) { printf("crypto_init: cannot start crypto thread; error %d", error); goto bad; } crypto_ret_workers = malloc(crypto_workers_num * sizeof(struct crypto_ret_worker), M_CRYPTO_DATA, M_NOWAIT|M_ZERO); if (crypto_ret_workers == NULL) { error = ENOMEM; printf("crypto_init: cannot allocate ret workers\n"); goto bad; } FOREACH_CRYPTO_RETW(ret_worker) { TAILQ_INIT(&ret_worker->crp_ordered_ret_q); TAILQ_INIT(&ret_worker->crp_ret_q); TAILQ_INIT(&ret_worker->crp_ret_kq); ret_worker->reorder_ops = 0; ret_worker->reorder_cur_seq = 0; mtx_init(&ret_worker->crypto_ret_mtx, "crypto", "crypto return queues", MTX_DEF); error = kproc_create((void (*)(void *)) crypto_ret_proc, ret_worker, &ret_worker->cryptoretproc, 0, 0, "crypto returns %td", CRYPTO_RETW_ID(ret_worker)); if (error) { printf("crypto_init: cannot start cryptoret thread; error %d", error); goto bad; } } keybuf_init(); return 0; bad: crypto_destroy(); return error; } /* * Signal a crypto thread to terminate. We use the driver * table lock to synchronize the sleep/wakeups so that we * are sure the threads have terminated before we release * the data structures they use. See crypto_finis below * for the other half of this song-and-dance. */ static void crypto_terminate(struct proc **pp, void *q) { struct proc *p; mtx_assert(&crypto_drivers_mtx, MA_OWNED); p = *pp; *pp = NULL; if (p) { wakeup_one(q); PROC_LOCK(p); /* NB: insure we don't miss wakeup */ CRYPTO_DRIVER_UNLOCK(); /* let crypto_finis progress */ msleep(p, &p->p_mtx, PWAIT, "crypto_destroy", 0); PROC_UNLOCK(p); CRYPTO_DRIVER_LOCK(); } } static void hmac_init_pad(struct auth_hash *axf, const char *key, int klen, void *auth_ctx, uint8_t padval) { uint8_t hmac_key[HMAC_MAX_BLOCK_LEN]; u_int i; KASSERT(axf->blocksize <= sizeof(hmac_key), ("Invalid HMAC block size %d", axf->blocksize)); /* * If the key is larger than the block size, use the digest of * the key as the key instead. */ memset(hmac_key, 0, sizeof(hmac_key)); if (klen > axf->blocksize) { axf->Init(auth_ctx); axf->Update(auth_ctx, key, klen); axf->Final(hmac_key, auth_ctx); klen = axf->hashsize; } else memcpy(hmac_key, key, klen); for (i = 0; i < axf->blocksize; i++) hmac_key[i] ^= padval; axf->Init(auth_ctx); axf->Update(auth_ctx, hmac_key, axf->blocksize); } void hmac_init_ipad(struct auth_hash *axf, const char *key, int klen, void *auth_ctx) { hmac_init_pad(axf, key, klen, auth_ctx, HMAC_IPAD_VAL); } void hmac_init_opad(struct auth_hash *axf, const char *key, int klen, void *auth_ctx) { hmac_init_pad(axf, key, klen, auth_ctx, HMAC_OPAD_VAL); } static void crypto_destroy(void) { struct crypto_ret_worker *ret_worker; int i; /* * Terminate any crypto threads. */ if (crypto_tq != NULL) taskqueue_drain_all(crypto_tq); CRYPTO_DRIVER_LOCK(); crypto_terminate(&cryptoproc, &crp_q); FOREACH_CRYPTO_RETW(ret_worker) crypto_terminate(&ret_worker->cryptoretproc, &ret_worker->crp_ret_q); CRYPTO_DRIVER_UNLOCK(); /* XXX flush queues??? */ /* * Reclaim dynamically allocated resources. */ for (i = 0; i < crypto_drivers_size; i++) { if (crypto_drivers[i] != NULL) cap_rele(crypto_drivers[i]); } free(crypto_drivers, M_CRYPTO_DATA); if (cryptoses_zone != NULL) uma_zdestroy(cryptoses_zone); if (cryptop_zone != NULL) uma_zdestroy(cryptop_zone); mtx_destroy(&crypto_q_mtx); FOREACH_CRYPTO_RETW(ret_worker) mtx_destroy(&ret_worker->crypto_ret_mtx); free(crypto_ret_workers, M_CRYPTO_DATA); if (crypto_tq != NULL) taskqueue_free(crypto_tq); mtx_destroy(&crypto_drivers_mtx); } uint32_t crypto_ses2hid(crypto_session_t crypto_session) { return (crypto_session->cap->cc_hid); } uint32_t crypto_ses2caps(crypto_session_t crypto_session) { return (crypto_session->cap->cc_flags & 0xff000000); } void * crypto_get_driver_session(crypto_session_t crypto_session) { return (crypto_session->softc); } const struct crypto_session_params * crypto_get_params(crypto_session_t crypto_session) { return (&crypto_session->csp); } struct auth_hash * crypto_auth_hash(const struct crypto_session_params *csp) { switch (csp->csp_auth_alg) { case CRYPTO_SHA1_HMAC: return (&auth_hash_hmac_sha1); case CRYPTO_SHA2_224_HMAC: return (&auth_hash_hmac_sha2_224); case CRYPTO_SHA2_256_HMAC: return (&auth_hash_hmac_sha2_256); case CRYPTO_SHA2_384_HMAC: return (&auth_hash_hmac_sha2_384); case CRYPTO_SHA2_512_HMAC: return (&auth_hash_hmac_sha2_512); case CRYPTO_NULL_HMAC: return (&auth_hash_null); case CRYPTO_RIPEMD160_HMAC: return (&auth_hash_hmac_ripemd_160); case CRYPTO_SHA1: return (&auth_hash_sha1); case CRYPTO_SHA2_224: return (&auth_hash_sha2_224); case CRYPTO_SHA2_256: return (&auth_hash_sha2_256); case CRYPTO_SHA2_384: return (&auth_hash_sha2_384); case CRYPTO_SHA2_512: return (&auth_hash_sha2_512); case CRYPTO_AES_NIST_GMAC: switch (csp->csp_auth_klen) { case 128 / 8: return (&auth_hash_nist_gmac_aes_128); case 192 / 8: return (&auth_hash_nist_gmac_aes_192); case 256 / 8: return (&auth_hash_nist_gmac_aes_256); default: return (NULL); } case CRYPTO_BLAKE2B: return (&auth_hash_blake2b); case CRYPTO_BLAKE2S: return (&auth_hash_blake2s); case CRYPTO_POLY1305: return (&auth_hash_poly1305); case CRYPTO_AES_CCM_CBC_MAC: switch (csp->csp_auth_klen) { case 128 / 8: return (&auth_hash_ccm_cbc_mac_128); case 192 / 8: return (&auth_hash_ccm_cbc_mac_192); case 256 / 8: return (&auth_hash_ccm_cbc_mac_256); default: return (NULL); } default: return (NULL); } } struct enc_xform * crypto_cipher(const struct crypto_session_params *csp) { switch (csp->csp_cipher_alg) { case CRYPTO_RIJNDAEL128_CBC: return (&enc_xform_rijndael128); case CRYPTO_AES_XTS: return (&enc_xform_aes_xts); case CRYPTO_AES_ICM: return (&enc_xform_aes_icm); case CRYPTO_AES_NIST_GCM_16: return (&enc_xform_aes_nist_gcm); case CRYPTO_CAMELLIA_CBC: return (&enc_xform_camellia); case CRYPTO_NULL_CBC: return (&enc_xform_null); case CRYPTO_CHACHA20: return (&enc_xform_chacha20); case CRYPTO_AES_CCM_16: return (&enc_xform_ccm); default: return (NULL); } } static struct cryptocap * crypto_checkdriver(u_int32_t hid) { return (hid >= crypto_drivers_size ? NULL : crypto_drivers[hid]); } /* * Select a driver for a new session that supports the specified * algorithms and, optionally, is constrained according to the flags. */ static struct cryptocap * crypto_select_driver(const struct crypto_session_params *csp, int flags) { struct cryptocap *cap, *best; int best_match, error, hid; CRYPTO_DRIVER_ASSERT(); best = NULL; for (hid = 0; hid < crypto_drivers_size; hid++) { /* * If there is no driver for this slot, or the driver * is not appropriate (hardware or software based on * match), then skip. */ cap = crypto_drivers[hid]; if (cap == NULL || (cap->cc_flags & flags) == 0) continue; error = CRYPTODEV_PROBESESSION(cap->cc_dev, csp); if (error >= 0) continue; /* * Use the driver with the highest probe value. * Hardware drivers use a higher probe value than * software. In case of a tie, prefer the driver with * the fewest active sessions. */ if (best == NULL || error > best_match || (error == best_match && cap->cc_sessions < best->cc_sessions)) { best = cap; best_match = error; } } return best; } static enum alg_type { ALG_NONE = 0, ALG_CIPHER, ALG_DIGEST, ALG_KEYED_DIGEST, ALG_COMPRESSION, ALG_AEAD } alg_types[] = { [CRYPTO_SHA1_HMAC] = ALG_KEYED_DIGEST, [CRYPTO_RIPEMD160_HMAC] = ALG_KEYED_DIGEST, [CRYPTO_AES_CBC] = ALG_CIPHER, [CRYPTO_SHA1] = ALG_DIGEST, [CRYPTO_NULL_HMAC] = ALG_DIGEST, [CRYPTO_NULL_CBC] = ALG_CIPHER, [CRYPTO_DEFLATE_COMP] = ALG_COMPRESSION, [CRYPTO_SHA2_256_HMAC] = ALG_KEYED_DIGEST, [CRYPTO_SHA2_384_HMAC] = ALG_KEYED_DIGEST, [CRYPTO_SHA2_512_HMAC] = ALG_KEYED_DIGEST, [CRYPTO_CAMELLIA_CBC] = ALG_CIPHER, [CRYPTO_AES_XTS] = ALG_CIPHER, [CRYPTO_AES_ICM] = ALG_CIPHER, [CRYPTO_AES_NIST_GMAC] = ALG_KEYED_DIGEST, [CRYPTO_AES_NIST_GCM_16] = ALG_AEAD, [CRYPTO_BLAKE2B] = ALG_KEYED_DIGEST, [CRYPTO_BLAKE2S] = ALG_KEYED_DIGEST, [CRYPTO_CHACHA20] = ALG_CIPHER, [CRYPTO_SHA2_224_HMAC] = ALG_KEYED_DIGEST, [CRYPTO_RIPEMD160] = ALG_DIGEST, [CRYPTO_SHA2_224] = ALG_DIGEST, [CRYPTO_SHA2_256] = ALG_DIGEST, [CRYPTO_SHA2_384] = ALG_DIGEST, [CRYPTO_SHA2_512] = ALG_DIGEST, [CRYPTO_POLY1305] = ALG_KEYED_DIGEST, [CRYPTO_AES_CCM_CBC_MAC] = ALG_KEYED_DIGEST, [CRYPTO_AES_CCM_16] = ALG_AEAD, }; static enum alg_type alg_type(int alg) { if (alg < nitems(alg_types)) return (alg_types[alg]); return (ALG_NONE); } static bool alg_is_compression(int alg) { return (alg_type(alg) == ALG_COMPRESSION); } static bool alg_is_cipher(int alg) { return (alg_type(alg) == ALG_CIPHER); } static bool alg_is_digest(int alg) { return (alg_type(alg) == ALG_DIGEST || alg_type(alg) == ALG_KEYED_DIGEST); } static bool alg_is_keyed_digest(int alg) { return (alg_type(alg) == ALG_KEYED_DIGEST); } static bool alg_is_aead(int alg) { return (alg_type(alg) == ALG_AEAD); } /* Various sanity checks on crypto session parameters. */ static bool check_csp(const struct crypto_session_params *csp) { struct auth_hash *axf; /* Mode-independent checks. */ if ((csp->csp_flags & ~(CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD)) != 0) return (false); if (csp->csp_ivlen < 0 || csp->csp_cipher_klen < 0 || csp->csp_auth_klen < 0 || csp->csp_auth_mlen < 0) return (false); if (csp->csp_auth_key != NULL && csp->csp_auth_klen == 0) return (false); if (csp->csp_cipher_key != NULL && csp->csp_cipher_klen == 0) return (false); switch (csp->csp_mode) { case CSP_MODE_COMPRESS: if (!alg_is_compression(csp->csp_cipher_alg)) return (false); if (csp->csp_flags & CSP_F_SEPARATE_OUTPUT) return (false); if (csp->csp_flags & CSP_F_SEPARATE_AAD) return (false); if (csp->csp_cipher_klen != 0 || csp->csp_ivlen != 0 || csp->csp_auth_alg != 0 || csp->csp_auth_klen != 0 || csp->csp_auth_mlen != 0) return (false); break; case CSP_MODE_CIPHER: if (!alg_is_cipher(csp->csp_cipher_alg)) return (false); if (csp->csp_flags & CSP_F_SEPARATE_AAD) return (false); if (csp->csp_cipher_alg != CRYPTO_NULL_CBC) { if (csp->csp_cipher_klen == 0) return (false); if (csp->csp_ivlen == 0) return (false); } if (csp->csp_ivlen >= EALG_MAX_BLOCK_LEN) return (false); if (csp->csp_auth_alg != 0 || csp->csp_auth_klen != 0 || csp->csp_auth_mlen != 0) return (false); break; case CSP_MODE_DIGEST: if (csp->csp_cipher_alg != 0 || csp->csp_cipher_klen != 0) return (false); if (csp->csp_flags & CSP_F_SEPARATE_AAD) return (false); /* IV is optional for digests (e.g. GMAC). */ if (csp->csp_ivlen >= EALG_MAX_BLOCK_LEN) return (false); if (!alg_is_digest(csp->csp_auth_alg)) return (false); /* Key is optional for BLAKE2 digests. */ if (csp->csp_auth_alg == CRYPTO_BLAKE2B || csp->csp_auth_alg == CRYPTO_BLAKE2S) ; else if (alg_is_keyed_digest(csp->csp_auth_alg)) { if (csp->csp_auth_klen == 0) return (false); } else { if (csp->csp_auth_klen != 0) return (false); } if (csp->csp_auth_mlen != 0) { axf = crypto_auth_hash(csp); if (axf == NULL || csp->csp_auth_mlen > axf->hashsize) return (false); } break; case CSP_MODE_AEAD: if (!alg_is_aead(csp->csp_cipher_alg)) return (false); if (csp->csp_cipher_klen == 0) return (false); if (csp->csp_ivlen == 0 || csp->csp_ivlen >= EALG_MAX_BLOCK_LEN) return (false); if (csp->csp_auth_alg != 0 || csp->csp_auth_klen != 0) return (false); /* * XXX: Would be nice to have a better way to get this * value. */ switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: if (csp->csp_auth_mlen > 16) return (false); break; } break; case CSP_MODE_ETA: if (!alg_is_cipher(csp->csp_cipher_alg)) return (false); if (csp->csp_cipher_alg != CRYPTO_NULL_CBC) { if (csp->csp_cipher_klen == 0) return (false); if (csp->csp_ivlen == 0) return (false); } if (csp->csp_ivlen >= EALG_MAX_BLOCK_LEN) return (false); if (!alg_is_digest(csp->csp_auth_alg)) return (false); /* Key is optional for BLAKE2 digests. */ if (csp->csp_auth_alg == CRYPTO_BLAKE2B || csp->csp_auth_alg == CRYPTO_BLAKE2S) ; else if (alg_is_keyed_digest(csp->csp_auth_alg)) { if (csp->csp_auth_klen == 0) return (false); } else { if (csp->csp_auth_klen != 0) return (false); } if (csp->csp_auth_mlen != 0) { axf = crypto_auth_hash(csp); if (axf == NULL || csp->csp_auth_mlen > axf->hashsize) return (false); } break; default: return (false); } return (true); } /* * Delete a session after it has been detached from its driver. */ static void crypto_deletesession(crypto_session_t cses) { struct cryptocap *cap; cap = cses->cap; - explicit_bzero(cses->softc, cap->cc_session_size); - free(cses->softc, M_CRYPTO_DATA); + zfree(cses->softc, M_CRYPTO_DATA); uma_zfree(cryptoses_zone, cses); CRYPTO_DRIVER_LOCK(); cap->cc_sessions--; if (cap->cc_sessions == 0 && cap->cc_flags & CRYPTOCAP_F_CLEANUP) wakeup(cap); CRYPTO_DRIVER_UNLOCK(); cap_rele(cap); } /* * Create a new session. The crid argument specifies a crypto * driver to use or constraints on a driver to select (hardware * only, software only, either). Whatever driver is selected * must be capable of the requested crypto algorithms. */ int crypto_newsession(crypto_session_t *cses, const struct crypto_session_params *csp, int crid) { crypto_session_t res; struct cryptocap *cap; int err; if (!check_csp(csp)) return (EINVAL); res = NULL; CRYPTO_DRIVER_LOCK(); if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) { /* * Use specified driver; verify it is capable. */ cap = crypto_checkdriver(crid); if (cap != NULL && CRYPTODEV_PROBESESSION(cap->cc_dev, csp) > 0) cap = NULL; } else { /* * No requested driver; select based on crid flags. */ cap = crypto_select_driver(csp, crid); } if (cap == NULL) { CRYPTO_DRIVER_UNLOCK(); CRYPTDEB("no driver"); return (EOPNOTSUPP); } cap_ref(cap); cap->cc_sessions++; CRYPTO_DRIVER_UNLOCK(); res = uma_zalloc(cryptoses_zone, M_WAITOK | M_ZERO); res->cap = cap; res->softc = malloc(cap->cc_session_size, M_CRYPTO_DATA, M_WAITOK | M_ZERO); res->csp = *csp; /* Call the driver initialization routine. */ err = CRYPTODEV_NEWSESSION(cap->cc_dev, res, csp); if (err != 0) { CRYPTDEB("dev newsession failed: %d", err); crypto_deletesession(res); return (err); } *cses = res; return (0); } /* * Delete an existing session (or a reserved session on an unregistered * driver). */ void crypto_freesession(crypto_session_t cses) { struct cryptocap *cap; if (cses == NULL) return; cap = cses->cap; /* Call the driver cleanup routine, if available. */ CRYPTODEV_FREESESSION(cap->cc_dev, cses); crypto_deletesession(cses); } /* * Return a new driver id. Registers a driver with the system so that * it can be probed by subsequent sessions. */ int32_t crypto_get_driverid(device_t dev, size_t sessionsize, int flags) { struct cryptocap *cap, **newdrv; int i; if ((flags & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) { device_printf(dev, "no flags specified when registering driver\n"); return -1; } cap = malloc(sizeof(*cap), M_CRYPTO_DATA, M_WAITOK | M_ZERO); cap->cc_dev = dev; cap->cc_session_size = sessionsize; cap->cc_flags = flags; refcount_init(&cap->cc_refs, 1); CRYPTO_DRIVER_LOCK(); for (;;) { for (i = 0; i < crypto_drivers_size; i++) { if (crypto_drivers[i] == NULL) break; } if (i < crypto_drivers_size) break; /* Out of entries, allocate some more. */ if (2 * crypto_drivers_size <= crypto_drivers_size) { CRYPTO_DRIVER_UNLOCK(); printf("crypto: driver count wraparound!\n"); cap_rele(cap); return (-1); } CRYPTO_DRIVER_UNLOCK(); newdrv = malloc(2 * crypto_drivers_size * sizeof(*crypto_drivers), M_CRYPTO_DATA, M_WAITOK | M_ZERO); CRYPTO_DRIVER_LOCK(); memcpy(newdrv, crypto_drivers, crypto_drivers_size * sizeof(*crypto_drivers)); crypto_drivers_size *= 2; free(crypto_drivers, M_CRYPTO_DATA); crypto_drivers = newdrv; } cap->cc_hid = i; crypto_drivers[i] = cap; CRYPTO_DRIVER_UNLOCK(); if (bootverbose) printf("crypto: assign %s driver id %u, flags 0x%x\n", device_get_nameunit(dev), i, flags); return i; } /* * Lookup a driver by name. We match against the full device * name and unit, and against just the name. The latter gives * us a simple widlcarding by device name. On success return the * driver/hardware identifier; otherwise return -1. */ int crypto_find_driver(const char *match) { struct cryptocap *cap; int i, len = strlen(match); CRYPTO_DRIVER_LOCK(); for (i = 0; i < crypto_drivers_size; i++) { if (crypto_drivers[i] == NULL) continue; cap = crypto_drivers[i]; if (strncmp(match, device_get_nameunit(cap->cc_dev), len) == 0 || strncmp(match, device_get_name(cap->cc_dev), len) == 0) { CRYPTO_DRIVER_UNLOCK(); return (i); } } CRYPTO_DRIVER_UNLOCK(); return (-1); } /* * Return the device_t for the specified driver or NULL * if the driver identifier is invalid. */ device_t crypto_find_device_byhid(int hid) { struct cryptocap *cap; device_t dev; dev = NULL; CRYPTO_DRIVER_LOCK(); cap = crypto_checkdriver(hid); if (cap != NULL) dev = cap->cc_dev; CRYPTO_DRIVER_UNLOCK(); return (dev); } /* * Return the device/driver capabilities. */ int crypto_getcaps(int hid) { struct cryptocap *cap; int flags; flags = 0; CRYPTO_DRIVER_LOCK(); cap = crypto_checkdriver(hid); if (cap != NULL) flags = cap->cc_flags; CRYPTO_DRIVER_UNLOCK(); return (flags); } /* * Register support for a key-related algorithm. This routine * is called once for each algorithm supported a driver. */ int crypto_kregister(u_int32_t driverid, int kalg, u_int32_t flags) { struct cryptocap *cap; int err; CRYPTO_DRIVER_LOCK(); cap = crypto_checkdriver(driverid); if (cap != NULL && (CRK_ALGORITM_MIN <= kalg && kalg <= CRK_ALGORITHM_MAX)) { /* * XXX Do some performance testing to determine placing. * XXX We probably need an auxiliary data structure that * XXX describes relative performances. */ cap->cc_kalg[kalg] = flags | CRYPTO_ALG_FLAG_SUPPORTED; if (bootverbose) printf("crypto: %s registers key alg %u flags %u\n" , device_get_nameunit(cap->cc_dev) , kalg , flags ); err = 0; } else err = EINVAL; CRYPTO_DRIVER_UNLOCK(); return err; } /* * Unregister all algorithms associated with a crypto driver. * If there are pending sessions using it, leave enough information * around so that subsequent calls using those sessions will * correctly detect the driver has been unregistered and reroute * requests. */ int crypto_unregister_all(u_int32_t driverid) { struct cryptocap *cap; CRYPTO_DRIVER_LOCK(); cap = crypto_checkdriver(driverid); if (cap == NULL) { CRYPTO_DRIVER_UNLOCK(); return (EINVAL); } cap->cc_flags |= CRYPTOCAP_F_CLEANUP; crypto_drivers[driverid] = NULL; /* * XXX: This doesn't do anything to kick sessions that * have no pending operations. */ while (cap->cc_sessions != 0 || cap->cc_koperations != 0) mtx_sleep(cap, &crypto_drivers_mtx, 0, "cryunreg", 0); CRYPTO_DRIVER_UNLOCK(); cap_rele(cap); return (0); } /* * Clear blockage on a driver. The what parameter indicates whether * the driver is now ready for cryptop's and/or cryptokop's. */ int crypto_unblock(u_int32_t driverid, int what) { struct cryptocap *cap; int err; CRYPTO_Q_LOCK(); cap = crypto_checkdriver(driverid); if (cap != NULL) { if (what & CRYPTO_SYMQ) cap->cc_qblocked = 0; if (what & CRYPTO_ASYMQ) cap->cc_kqblocked = 0; if (crp_sleep) wakeup_one(&crp_q); err = 0; } else err = EINVAL; CRYPTO_Q_UNLOCK(); return err; } size_t crypto_buffer_len(struct crypto_buffer *cb) { switch (cb->cb_type) { case CRYPTO_BUF_CONTIG: return (cb->cb_buf_len); case CRYPTO_BUF_MBUF: if (cb->cb_mbuf->m_flags & M_PKTHDR) return (cb->cb_mbuf->m_pkthdr.len); return (m_length(cb->cb_mbuf, NULL)); case CRYPTO_BUF_UIO: return (cb->cb_uio->uio_resid); default: return (0); } } #ifdef INVARIANTS /* Various sanity checks on crypto requests. */ static void cb_sanity(struct crypto_buffer *cb, const char *name) { KASSERT(cb->cb_type > CRYPTO_BUF_NONE && cb->cb_type <= CRYPTO_BUF_LAST, ("incoming crp with invalid %s buffer type", name)); if (cb->cb_type == CRYPTO_BUF_CONTIG) KASSERT(cb->cb_buf_len >= 0, ("incoming crp with -ve %s buffer length", name)); } static void crp_sanity(struct cryptop *crp) { struct crypto_session_params *csp; struct crypto_buffer *out; size_t ilen, len, olen; KASSERT(crp->crp_session != NULL, ("incoming crp without a session")); KASSERT(crp->crp_obuf.cb_type >= CRYPTO_BUF_NONE && crp->crp_obuf.cb_type <= CRYPTO_BUF_LAST, ("incoming crp with invalid output buffer type")); KASSERT(crp->crp_etype == 0, ("incoming crp with error")); KASSERT(!(crp->crp_flags & CRYPTO_F_DONE), ("incoming crp already done")); csp = &crp->crp_session->csp; cb_sanity(&crp->crp_buf, "input"); ilen = crypto_buffer_len(&crp->crp_buf); olen = ilen; out = NULL; if (csp->csp_flags & CSP_F_SEPARATE_OUTPUT) { if (crp->crp_obuf.cb_type != CRYPTO_BUF_NONE) { cb_sanity(&crp->crp_obuf, "output"); out = &crp->crp_obuf; olen = crypto_buffer_len(out); } } else KASSERT(crp->crp_obuf.cb_type == CRYPTO_BUF_NONE, ("incoming crp with separate output buffer " "but no session support")); switch (csp->csp_mode) { case CSP_MODE_COMPRESS: KASSERT(crp->crp_op == CRYPTO_OP_COMPRESS || crp->crp_op == CRYPTO_OP_DECOMPRESS, ("invalid compression op %x", crp->crp_op)); break; case CSP_MODE_CIPHER: KASSERT(crp->crp_op == CRYPTO_OP_ENCRYPT || crp->crp_op == CRYPTO_OP_DECRYPT, ("invalid cipher op %x", crp->crp_op)); break; case CSP_MODE_DIGEST: KASSERT(crp->crp_op == CRYPTO_OP_COMPUTE_DIGEST || crp->crp_op == CRYPTO_OP_VERIFY_DIGEST, ("invalid digest op %x", crp->crp_op)); break; case CSP_MODE_AEAD: KASSERT(crp->crp_op == (CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST) || crp->crp_op == (CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST), ("invalid AEAD op %x", crp->crp_op)); if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) KASSERT(crp->crp_flags & CRYPTO_F_IV_SEPARATE, ("GCM without a separate IV")); if (csp->csp_cipher_alg == CRYPTO_AES_CCM_16) KASSERT(crp->crp_flags & CRYPTO_F_IV_SEPARATE, ("CCM without a separate IV")); break; case CSP_MODE_ETA: KASSERT(crp->crp_op == (CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST) || crp->crp_op == (CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST), ("invalid ETA op %x", crp->crp_op)); break; } if (csp->csp_mode == CSP_MODE_AEAD || csp->csp_mode == CSP_MODE_ETA) { if (crp->crp_aad == NULL) { KASSERT(crp->crp_aad_start == 0 || crp->crp_aad_start < ilen, ("invalid AAD start")); KASSERT(crp->crp_aad_length != 0 || crp->crp_aad_start == 0, ("AAD with zero length and non-zero start")); KASSERT(crp->crp_aad_length == 0 || crp->crp_aad_start + crp->crp_aad_length <= ilen, ("AAD outside input length")); } else { KASSERT(csp->csp_flags & CSP_F_SEPARATE_AAD, ("session doesn't support separate AAD buffer")); KASSERT(crp->crp_aad_start == 0, ("separate AAD buffer with non-zero AAD start")); KASSERT(crp->crp_aad_length != 0, ("separate AAD buffer with zero length")); } } else { KASSERT(crp->crp_aad == NULL && crp->crp_aad_start == 0 && crp->crp_aad_length == 0, ("AAD region in request not supporting AAD")); } if (csp->csp_ivlen == 0) { KASSERT((crp->crp_flags & CRYPTO_F_IV_SEPARATE) == 0, ("IV_SEPARATE set when IV isn't used")); KASSERT(crp->crp_iv_start == 0, ("crp_iv_start set when IV isn't used")); } else if (crp->crp_flags & CRYPTO_F_IV_SEPARATE) { KASSERT(crp->crp_iv_start == 0, ("IV_SEPARATE used with non-zero IV start")); } else { KASSERT(crp->crp_iv_start < ilen, ("invalid IV start")); KASSERT(crp->crp_iv_start + csp->csp_ivlen <= ilen, ("IV outside buffer length")); } /* XXX: payload_start of 0 should always be < ilen? */ KASSERT(crp->crp_payload_start == 0 || crp->crp_payload_start < ilen, ("invalid payload start")); KASSERT(crp->crp_payload_start + crp->crp_payload_length <= ilen, ("payload outside input buffer")); if (out == NULL) { KASSERT(crp->crp_payload_output_start == 0, ("payload output start non-zero without output buffer")); } else { KASSERT(crp->crp_payload_output_start < olen, ("invalid payload output start")); KASSERT(crp->crp_payload_output_start + crp->crp_payload_length <= olen, ("payload outside output buffer")); } if (csp->csp_mode == CSP_MODE_DIGEST || csp->csp_mode == CSP_MODE_AEAD || csp->csp_mode == CSP_MODE_ETA) { if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) len = ilen; else len = olen; KASSERT(crp->crp_digest_start == 0 || crp->crp_digest_start < len, ("invalid digest start")); /* XXX: For the mlen == 0 case this check isn't perfect. */ KASSERT(crp->crp_digest_start + csp->csp_auth_mlen <= len, ("digest outside buffer")); } else { KASSERT(crp->crp_digest_start == 0, ("non-zero digest start for request without a digest")); } if (csp->csp_cipher_klen != 0) KASSERT(csp->csp_cipher_key != NULL || crp->crp_cipher_key != NULL, ("cipher request without a key")); if (csp->csp_auth_klen != 0) KASSERT(csp->csp_auth_key != NULL || crp->crp_auth_key != NULL, ("auth request without a key")); KASSERT(crp->crp_callback != NULL, ("incoming crp without callback")); } #endif /* * Add a crypto request to a queue, to be processed by the kernel thread. */ int crypto_dispatch(struct cryptop *crp) { struct cryptocap *cap; int result; #ifdef INVARIANTS crp_sanity(crp); #endif cryptostats.cs_ops++; #ifdef CRYPTO_TIMING if (crypto_timing) binuptime(&crp->crp_tstamp); #endif crp->crp_retw_id = ((uintptr_t)crp->crp_session) % crypto_workers_num; if (CRYPTOP_ASYNC(crp)) { if (crp->crp_flags & CRYPTO_F_ASYNC_KEEPORDER) { struct crypto_ret_worker *ret_worker; ret_worker = CRYPTO_RETW(crp->crp_retw_id); CRYPTO_RETW_LOCK(ret_worker); crp->crp_seq = ret_worker->reorder_ops++; CRYPTO_RETW_UNLOCK(ret_worker); } TASK_INIT(&crp->crp_task, 0, crypto_task_invoke, crp); taskqueue_enqueue(crypto_tq, &crp->crp_task); return (0); } if ((crp->crp_flags & CRYPTO_F_BATCH) == 0) { /* * Caller marked the request to be processed * immediately; dispatch it directly to the * driver unless the driver is currently blocked. */ cap = crp->crp_session->cap; if (!cap->cc_qblocked) { result = crypto_invoke(cap, crp, 0); if (result != ERESTART) return (result); /* * The driver ran out of resources, put the request on * the queue. */ } } crypto_batch_enqueue(crp); return 0; } void crypto_batch_enqueue(struct cryptop *crp) { CRYPTO_Q_LOCK(); TAILQ_INSERT_TAIL(&crp_q, crp, crp_next); if (crp_sleep) wakeup_one(&crp_q); CRYPTO_Q_UNLOCK(); } /* * Add an asymetric crypto request to a queue, * to be processed by the kernel thread. */ int crypto_kdispatch(struct cryptkop *krp) { int error; cryptostats.cs_kops++; krp->krp_cap = NULL; error = crypto_kinvoke(krp); if (error == ERESTART) { CRYPTO_Q_LOCK(); TAILQ_INSERT_TAIL(&crp_kq, krp, krp_next); if (crp_sleep) wakeup_one(&crp_q); CRYPTO_Q_UNLOCK(); error = 0; } return error; } /* * Verify a driver is suitable for the specified operation. */ static __inline int kdriver_suitable(const struct cryptocap *cap, const struct cryptkop *krp) { return (cap->cc_kalg[krp->krp_op] & CRYPTO_ALG_FLAG_SUPPORTED) != 0; } /* * Select a driver for an asym operation. The driver must * support the necessary algorithm. The caller can constrain * which device is selected with the flags parameter. The * algorithm we use here is pretty stupid; just use the first * driver that supports the algorithms we need. If there are * multiple suitable drivers we choose the driver with the * fewest active operations. We prefer hardware-backed * drivers to software ones when either may be used. */ static struct cryptocap * crypto_select_kdriver(const struct cryptkop *krp, int flags) { struct cryptocap *cap, *best; int match, hid; CRYPTO_DRIVER_ASSERT(); /* * Look first for hardware crypto devices if permitted. */ if (flags & CRYPTOCAP_F_HARDWARE) match = CRYPTOCAP_F_HARDWARE; else match = CRYPTOCAP_F_SOFTWARE; best = NULL; again: for (hid = 0; hid < crypto_drivers_size; hid++) { /* * If there is no driver for this slot, or the driver * is not appropriate (hardware or software based on * match), then skip. */ cap = crypto_drivers[hid]; if (cap->cc_dev == NULL || (cap->cc_flags & match) == 0) continue; /* verify all the algorithms are supported. */ if (kdriver_suitable(cap, krp)) { if (best == NULL || cap->cc_koperations < best->cc_koperations) best = cap; } } if (best != NULL) return best; if (match == CRYPTOCAP_F_HARDWARE && (flags & CRYPTOCAP_F_SOFTWARE)) { /* sort of an Algol 68-style for loop */ match = CRYPTOCAP_F_SOFTWARE; goto again; } return best; } /* * Choose a driver for an asymmetric crypto request. */ static struct cryptocap * crypto_lookup_kdriver(struct cryptkop *krp) { struct cryptocap *cap; uint32_t crid; /* If this request is requeued, it might already have a driver. */ cap = krp->krp_cap; if (cap != NULL) return (cap); /* Use krp_crid to choose a driver. */ crid = krp->krp_crid; if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) { cap = crypto_checkdriver(crid); if (cap != NULL) { /* * Driver present, it must support the * necessary algorithm and, if s/w drivers are * excluded, it must be registered as * hardware-backed. */ if (!kdriver_suitable(cap, krp) || (!crypto_devallowsoft && (cap->cc_flags & CRYPTOCAP_F_HARDWARE) == 0)) cap = NULL; } } else { /* * No requested driver; select based on crid flags. */ if (!crypto_devallowsoft) /* NB: disallow s/w drivers */ crid &= ~CRYPTOCAP_F_SOFTWARE; cap = crypto_select_kdriver(krp, crid); } if (cap != NULL) { krp->krp_cap = cap_ref(cap); krp->krp_hid = cap->cc_hid; } return (cap); } /* * Dispatch an asymmetric crypto request. */ static int crypto_kinvoke(struct cryptkop *krp) { struct cryptocap *cap = NULL; int error; KASSERT(krp != NULL, ("%s: krp == NULL", __func__)); KASSERT(krp->krp_callback != NULL, ("%s: krp->crp_callback == NULL", __func__)); CRYPTO_DRIVER_LOCK(); cap = crypto_lookup_kdriver(krp); if (cap == NULL) { CRYPTO_DRIVER_UNLOCK(); krp->krp_status = ENODEV; crypto_kdone(krp); return (0); } /* * If the device is blocked, return ERESTART to requeue it. */ if (cap->cc_kqblocked) { /* * XXX: Previously this set krp_status to ERESTART and * invoked crypto_kdone but the caller would still * requeue it. */ CRYPTO_DRIVER_UNLOCK(); return (ERESTART); } cap->cc_koperations++; CRYPTO_DRIVER_UNLOCK(); error = CRYPTODEV_KPROCESS(cap->cc_dev, krp, 0); if (error == ERESTART) { CRYPTO_DRIVER_LOCK(); cap->cc_koperations--; CRYPTO_DRIVER_UNLOCK(); return (error); } KASSERT(error == 0, ("error %d returned from crypto_kprocess", error)); return (0); } #ifdef CRYPTO_TIMING static void crypto_tstat(struct cryptotstat *ts, struct bintime *bt) { struct bintime now, delta; struct timespec t; uint64_t u; binuptime(&now); u = now.frac; delta.frac = now.frac - bt->frac; delta.sec = now.sec - bt->sec; if (u < delta.frac) delta.sec--; bintime2timespec(&delta, &t); timespecadd(&ts->acc, &t, &ts->acc); if (timespeccmp(&t, &ts->min, <)) ts->min = t; if (timespeccmp(&t, &ts->max, >)) ts->max = t; ts->count++; *bt = now; } #endif static void crypto_task_invoke(void *ctx, int pending) { struct cryptocap *cap; struct cryptop *crp; int result; crp = (struct cryptop *)ctx; cap = crp->crp_session->cap; result = crypto_invoke(cap, crp, 0); if (result == ERESTART) crypto_batch_enqueue(crp); } /* * Dispatch a crypto request to the appropriate crypto devices. */ static int crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint) { KASSERT(crp != NULL, ("%s: crp == NULL", __func__)); KASSERT(crp->crp_callback != NULL, ("%s: crp->crp_callback == NULL", __func__)); KASSERT(crp->crp_session != NULL, ("%s: crp->crp_session == NULL", __func__)); #ifdef CRYPTO_TIMING if (crypto_timing) crypto_tstat(&cryptostats.cs_invoke, &crp->crp_tstamp); #endif if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) { struct crypto_session_params csp; crypto_session_t nses; /* * Driver has unregistered; migrate the session and return * an error to the caller so they'll resubmit the op. * * XXX: What if there are more already queued requests for this * session? * * XXX: Real solution is to make sessions refcounted * and force callers to hold a reference when * assigning to crp_session. Could maybe change * crypto_getreq to accept a session pointer to make * that work. Alternatively, we could abandon the * notion of rewriting crp_session in requests forcing * the caller to deal with allocating a new session. * Perhaps provide a method to allow a crp's session to * be swapped that callers could use. */ csp = crp->crp_session->csp; crypto_freesession(crp->crp_session); /* * XXX: Key pointers may no longer be valid. If we * really want to support this we need to define the * KPI such that 'csp' is required to be valid for the * duration of a session by the caller perhaps. * * XXX: If the keys have been changed this will reuse * the old keys. This probably suggests making * rekeying more explicit and updating the key * pointers in 'csp' when the keys change. */ if (crypto_newsession(&nses, &csp, CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE) == 0) crp->crp_session = nses; crp->crp_etype = EAGAIN; crypto_done(crp); return 0; } else { /* * Invoke the driver to process the request. */ return CRYPTODEV_PROCESS(cap->cc_dev, crp, hint); } } void crypto_freereq(struct cryptop *crp) { if (crp == NULL) return; #ifdef DIAGNOSTIC { struct cryptop *crp2; struct crypto_ret_worker *ret_worker; CRYPTO_Q_LOCK(); TAILQ_FOREACH(crp2, &crp_q, crp_next) { KASSERT(crp2 != crp, ("Freeing cryptop from the crypto queue (%p).", crp)); } CRYPTO_Q_UNLOCK(); FOREACH_CRYPTO_RETW(ret_worker) { CRYPTO_RETW_LOCK(ret_worker); TAILQ_FOREACH(crp2, &ret_worker->crp_ret_q, crp_next) { KASSERT(crp2 != crp, ("Freeing cryptop from the return queue (%p).", crp)); } CRYPTO_RETW_UNLOCK(ret_worker); } } #endif uma_zfree(cryptop_zone, crp); } struct cryptop * crypto_getreq(crypto_session_t cses, int how) { struct cryptop *crp; MPASS(how == M_WAITOK || how == M_NOWAIT); crp = uma_zalloc(cryptop_zone, how | M_ZERO); crp->crp_session = cses; return (crp); } /* * Invoke the callback on behalf of the driver. */ void crypto_done(struct cryptop *crp) { KASSERT((crp->crp_flags & CRYPTO_F_DONE) == 0, ("crypto_done: op already done, flags 0x%x", crp->crp_flags)); crp->crp_flags |= CRYPTO_F_DONE; if (crp->crp_etype != 0) cryptostats.cs_errs++; #ifdef CRYPTO_TIMING if (crypto_timing) crypto_tstat(&cryptostats.cs_done, &crp->crp_tstamp); #endif /* * CBIMM means unconditionally do the callback immediately; * CBIFSYNC means do the callback immediately only if the * operation was done synchronously. Both are used to avoid * doing extraneous context switches; the latter is mostly * used with the software crypto driver. */ if (!CRYPTOP_ASYNC_KEEPORDER(crp) && ((crp->crp_flags & CRYPTO_F_CBIMM) || ((crp->crp_flags & CRYPTO_F_CBIFSYNC) && (crypto_ses2caps(crp->crp_session) & CRYPTOCAP_F_SYNC)))) { /* * Do the callback directly. This is ok when the * callback routine does very little (e.g. the * /dev/crypto callback method just does a wakeup). */ #ifdef CRYPTO_TIMING if (crypto_timing) { /* * NB: We must copy the timestamp before * doing the callback as the cryptop is * likely to be reclaimed. */ struct bintime t = crp->crp_tstamp; crypto_tstat(&cryptostats.cs_cb, &t); crp->crp_callback(crp); crypto_tstat(&cryptostats.cs_finis, &t); } else #endif crp->crp_callback(crp); } else { struct crypto_ret_worker *ret_worker; bool wake; ret_worker = CRYPTO_RETW(crp->crp_retw_id); wake = false; /* * Normal case; queue the callback for the thread. */ CRYPTO_RETW_LOCK(ret_worker); if (CRYPTOP_ASYNC_KEEPORDER(crp)) { struct cryptop *tmp; TAILQ_FOREACH_REVERSE(tmp, &ret_worker->crp_ordered_ret_q, cryptop_q, crp_next) { if (CRYPTO_SEQ_GT(crp->crp_seq, tmp->crp_seq)) { TAILQ_INSERT_AFTER(&ret_worker->crp_ordered_ret_q, tmp, crp, crp_next); break; } } if (tmp == NULL) { TAILQ_INSERT_HEAD(&ret_worker->crp_ordered_ret_q, crp, crp_next); } if (crp->crp_seq == ret_worker->reorder_cur_seq) wake = true; } else { if (CRYPTO_RETW_EMPTY(ret_worker)) wake = true; TAILQ_INSERT_TAIL(&ret_worker->crp_ret_q, crp, crp_next); } if (wake) wakeup_one(&ret_worker->crp_ret_q); /* shared wait channel */ CRYPTO_RETW_UNLOCK(ret_worker); } } /* * Invoke the callback on behalf of the driver. */ void crypto_kdone(struct cryptkop *krp) { struct crypto_ret_worker *ret_worker; struct cryptocap *cap; if (krp->krp_status != 0) cryptostats.cs_kerrs++; CRYPTO_DRIVER_LOCK(); cap = krp->krp_cap; KASSERT(cap->cc_koperations > 0, ("cc_koperations == 0")); cap->cc_koperations--; if (cap->cc_koperations == 0 && cap->cc_flags & CRYPTOCAP_F_CLEANUP) wakeup(cap); CRYPTO_DRIVER_UNLOCK(); krp->krp_cap = NULL; cap_rele(cap); ret_worker = CRYPTO_RETW(0); CRYPTO_RETW_LOCK(ret_worker); if (CRYPTO_RETW_EMPTY(ret_worker)) wakeup_one(&ret_worker->crp_ret_q); /* shared wait channel */ TAILQ_INSERT_TAIL(&ret_worker->crp_ret_kq, krp, krp_next); CRYPTO_RETW_UNLOCK(ret_worker); } int crypto_getfeat(int *featp) { int hid, kalg, feat = 0; CRYPTO_DRIVER_LOCK(); for (hid = 0; hid < crypto_drivers_size; hid++) { const struct cryptocap *cap = crypto_drivers[hid]; if (cap == NULL || ((cap->cc_flags & CRYPTOCAP_F_SOFTWARE) && !crypto_devallowsoft)) { continue; } for (kalg = 0; kalg < CRK_ALGORITHM_MAX; kalg++) if (cap->cc_kalg[kalg] & CRYPTO_ALG_FLAG_SUPPORTED) feat |= 1 << kalg; } CRYPTO_DRIVER_UNLOCK(); *featp = feat; return (0); } /* * Terminate a thread at module unload. The process that * initiated this is waiting for us to signal that we're gone; * wake it up and exit. We use the driver table lock to insure * we don't do the wakeup before they're waiting. There is no * race here because the waiter sleeps on the proc lock for the * thread so it gets notified at the right time because of an * extra wakeup that's done in exit1(). */ static void crypto_finis(void *chan) { CRYPTO_DRIVER_LOCK(); wakeup_one(chan); CRYPTO_DRIVER_UNLOCK(); kproc_exit(0); } /* * Crypto thread, dispatches crypto requests. */ static void crypto_proc(void) { struct cryptop *crp, *submit; struct cryptkop *krp; struct cryptocap *cap; int result, hint; #if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) fpu_kern_thread(FPU_KERN_NORMAL); #endif CRYPTO_Q_LOCK(); for (;;) { /* * Find the first element in the queue that can be * processed and look-ahead to see if multiple ops * are ready for the same driver. */ submit = NULL; hint = 0; TAILQ_FOREACH(crp, &crp_q, crp_next) { cap = crp->crp_session->cap; /* * Driver cannot disappeared when there is an active * session. */ KASSERT(cap != NULL, ("%s:%u Driver disappeared.", __func__, __LINE__)); if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) { /* Op needs to be migrated, process it. */ if (submit == NULL) submit = crp; break; } if (!cap->cc_qblocked) { if (submit != NULL) { /* * We stop on finding another op, * regardless whether its for the same * driver or not. We could keep * searching the queue but it might be * better to just use a per-driver * queue instead. */ if (submit->crp_session->cap == cap) hint = CRYPTO_HINT_MORE; break; } else { submit = crp; if ((submit->crp_flags & CRYPTO_F_BATCH) == 0) break; /* keep scanning for more are q'd */ } } } if (submit != NULL) { TAILQ_REMOVE(&crp_q, submit, crp_next); cap = submit->crp_session->cap; KASSERT(cap != NULL, ("%s:%u Driver disappeared.", __func__, __LINE__)); CRYPTO_Q_UNLOCK(); result = crypto_invoke(cap, submit, hint); CRYPTO_Q_LOCK(); if (result == ERESTART) { /* * The driver ran out of resources, mark the * driver ``blocked'' for cryptop's and put * the request back in the queue. It would * best to put the request back where we got * it but that's hard so for now we put it * at the front. This should be ok; putting * it at the end does not work. */ cap->cc_qblocked = 1; TAILQ_INSERT_HEAD(&crp_q, submit, crp_next); cryptostats.cs_blocks++; } } /* As above, but for key ops */ TAILQ_FOREACH(krp, &crp_kq, krp_next) { cap = krp->krp_cap; if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) { /* * Operation needs to be migrated, * clear krp_cap so a new driver is * selected. */ krp->krp_cap = NULL; cap_rele(cap); break; } if (!cap->cc_kqblocked) break; } if (krp != NULL) { TAILQ_REMOVE(&crp_kq, krp, krp_next); CRYPTO_Q_UNLOCK(); result = crypto_kinvoke(krp); CRYPTO_Q_LOCK(); if (result == ERESTART) { /* * The driver ran out of resources, mark the * driver ``blocked'' for cryptkop's and put * the request back in the queue. It would * best to put the request back where we got * it but that's hard so for now we put it * at the front. This should be ok; putting * it at the end does not work. */ krp->krp_cap->cc_kqblocked = 1; TAILQ_INSERT_HEAD(&crp_kq, krp, krp_next); cryptostats.cs_kblocks++; } } if (submit == NULL && krp == NULL) { /* * Nothing more to be processed. Sleep until we're * woken because there are more ops to process. * This happens either by submission or by a driver * becoming unblocked and notifying us through * crypto_unblock. Note that when we wakeup we * start processing each queue again from the * front. It's not clear that it's important to * preserve this ordering since ops may finish * out of order if dispatched to different devices * and some become blocked while others do not. */ crp_sleep = 1; msleep(&crp_q, &crypto_q_mtx, PWAIT, "crypto_wait", 0); crp_sleep = 0; if (cryptoproc == NULL) break; cryptostats.cs_intrs++; } } CRYPTO_Q_UNLOCK(); crypto_finis(&crp_q); } /* * Crypto returns thread, does callbacks for processed crypto requests. * Callbacks are done here, rather than in the crypto drivers, because * callbacks typically are expensive and would slow interrupt handling. */ static void crypto_ret_proc(struct crypto_ret_worker *ret_worker) { struct cryptop *crpt; struct cryptkop *krpt; CRYPTO_RETW_LOCK(ret_worker); for (;;) { /* Harvest return q's for completed ops */ crpt = TAILQ_FIRST(&ret_worker->crp_ordered_ret_q); if (crpt != NULL) { if (crpt->crp_seq == ret_worker->reorder_cur_seq) { TAILQ_REMOVE(&ret_worker->crp_ordered_ret_q, crpt, crp_next); ret_worker->reorder_cur_seq++; } else { crpt = NULL; } } if (crpt == NULL) { crpt = TAILQ_FIRST(&ret_worker->crp_ret_q); if (crpt != NULL) TAILQ_REMOVE(&ret_worker->crp_ret_q, crpt, crp_next); } krpt = TAILQ_FIRST(&ret_worker->crp_ret_kq); if (krpt != NULL) TAILQ_REMOVE(&ret_worker->crp_ret_kq, krpt, krp_next); if (crpt != NULL || krpt != NULL) { CRYPTO_RETW_UNLOCK(ret_worker); /* * Run callbacks unlocked. */ if (crpt != NULL) { #ifdef CRYPTO_TIMING if (crypto_timing) { /* * NB: We must copy the timestamp before * doing the callback as the cryptop is * likely to be reclaimed. */ struct bintime t = crpt->crp_tstamp; crypto_tstat(&cryptostats.cs_cb, &t); crpt->crp_callback(crpt); crypto_tstat(&cryptostats.cs_finis, &t); } else #endif crpt->crp_callback(crpt); } if (krpt != NULL) krpt->krp_callback(krpt); CRYPTO_RETW_LOCK(ret_worker); } else { /* * Nothing more to be processed. Sleep until we're * woken because there are more returns to process. */ msleep(&ret_worker->crp_ret_q, &ret_worker->crypto_ret_mtx, PWAIT, "crypto_ret_wait", 0); if (ret_worker->cryptoretproc == NULL) break; cryptostats.cs_rets++; } } CRYPTO_RETW_UNLOCK(ret_worker); crypto_finis(&ret_worker->crp_ret_q); } #ifdef DDB static void db_show_drivers(void) { int hid; db_printf("%12s %4s %4s %8s %2s %2s\n" , "Device" , "Ses" , "Kops" , "Flags" , "QB" , "KB" ); for (hid = 0; hid < crypto_drivers_size; hid++) { const struct cryptocap *cap = crypto_drivers[hid]; if (cap == NULL) continue; db_printf("%-12s %4u %4u %08x %2u %2u\n" , device_get_nameunit(cap->cc_dev) , cap->cc_sessions , cap->cc_koperations , cap->cc_flags , cap->cc_qblocked , cap->cc_kqblocked ); } } DB_SHOW_COMMAND(crypto, db_show_crypto) { struct cryptop *crp; struct crypto_ret_worker *ret_worker; db_show_drivers(); db_printf("\n"); db_printf("%4s %8s %4s %4s %4s %4s %8s %8s\n", "HID", "Caps", "Ilen", "Olen", "Etype", "Flags", "Device", "Callback"); TAILQ_FOREACH(crp, &crp_q, crp_next) { db_printf("%4u %08x %4u %4u %04x %8p %8p\n" , crp->crp_session->cap->cc_hid , (int) crypto_ses2caps(crp->crp_session) , crp->crp_olen , crp->crp_etype , crp->crp_flags , device_get_nameunit(crp->crp_session->cap->cc_dev) , crp->crp_callback ); } FOREACH_CRYPTO_RETW(ret_worker) { db_printf("\n%8s %4s %4s %4s %8s\n", "ret_worker", "HID", "Etype", "Flags", "Callback"); if (!TAILQ_EMPTY(&ret_worker->crp_ret_q)) { TAILQ_FOREACH(crp, &ret_worker->crp_ret_q, crp_next) { db_printf("%8td %4u %4u %04x %8p\n" , CRYPTO_RETW_ID(ret_worker) , crp->crp_session->cap->cc_hid , crp->crp_etype , crp->crp_flags , crp->crp_callback ); } } } } DB_SHOW_COMMAND(kcrypto, db_show_kcrypto) { struct cryptkop *krp; struct crypto_ret_worker *ret_worker; db_show_drivers(); db_printf("\n"); db_printf("%4s %5s %4s %4s %8s %4s %8s\n", "Op", "Status", "#IP", "#OP", "CRID", "HID", "Callback"); TAILQ_FOREACH(krp, &crp_kq, krp_next) { db_printf("%4u %5u %4u %4u %08x %4u %8p\n" , krp->krp_op , krp->krp_status , krp->krp_iparams, krp->krp_oparams , krp->krp_crid, krp->krp_hid , krp->krp_callback ); } ret_worker = CRYPTO_RETW(0); if (!TAILQ_EMPTY(&ret_worker->crp_ret_q)) { db_printf("%4s %5s %8s %4s %8s\n", "Op", "Status", "CRID", "HID", "Callback"); TAILQ_FOREACH(krp, &ret_worker->crp_ret_kq, krp_next) { db_printf("%4u %5u %08x %4u %8p\n" , krp->krp_op , krp->krp_status , krp->krp_crid, krp->krp_hid , krp->krp_callback ); } } } #endif int crypto_modevent(module_t mod, int type, void *unused); /* * Initialization code, both for static and dynamic loading. * Note this is not invoked with the usual MODULE_DECLARE * mechanism but instead is listed as a dependency by the * cryptosoft driver. This guarantees proper ordering of * calls on module load/unload. */ int crypto_modevent(module_t mod, int type, void *unused) { int error = EINVAL; switch (type) { case MOD_LOAD: error = crypto_init(); if (error == 0 && bootverbose) printf("crypto: \n"); break; case MOD_UNLOAD: /*XXX disallow if active sessions */ error = 0; crypto_destroy(); return 0; } return error; } MODULE_VERSION(crypto, 1); MODULE_DEPEND(crypto, zlib, 1, 1, 1); Index: head/sys/opencrypto/cryptosoft.c =================================================================== --- head/sys/opencrypto/cryptosoft.c (revision 362623) +++ head/sys/opencrypto/cryptosoft.c (revision 362624) @@ -1,1541 +1,1528 @@ /* $OpenBSD: cryptosoft.c,v 1.35 2002/04/26 08:43:50 deraadt Exp $ */ /*- * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu) * Copyright (c) 2002-2006 Sam Leffler, Errno Consulting * * This code was written by Angelos D. Keromytis in Athens, Greece, in * February 2000. Network Security Technologies Inc. (NSTI) kindly * supported the development of this code. * * Copyright (c) 2000, 2001 Angelos D. Keromytis * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by John-Mark Gurney * under sponsorship of the FreeBSD Foundation and * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all source code copies of any software which is or includes a copy or * modification of this software. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cryptodev_if.h" struct swcr_auth { void *sw_ictx; void *sw_octx; struct auth_hash *sw_axf; uint16_t sw_mlen; }; struct swcr_encdec { void *sw_kschedule; struct enc_xform *sw_exf; }; struct swcr_compdec { struct comp_algo *sw_cxf; }; struct swcr_session { struct mtx swcr_lock; int (*swcr_process)(struct swcr_session *, struct cryptop *); struct swcr_auth swcr_auth; struct swcr_encdec swcr_encdec; struct swcr_compdec swcr_compdec; }; static int32_t swcr_id; static void swcr_freesession(device_t dev, crypto_session_t cses); /* Used for CRYPTO_NULL_CBC. */ static int swcr_null(struct swcr_session *ses, struct cryptop *crp) { return (0); } /* * Apply a symmetric encryption/decryption algorithm. */ static int swcr_encdec(struct swcr_session *ses, struct cryptop *crp) { unsigned char iv[EALG_MAX_BLOCK_LEN], blk[EALG_MAX_BLOCK_LEN]; unsigned char *ivp, *nivp, iv2[EALG_MAX_BLOCK_LEN]; const struct crypto_session_params *csp; struct swcr_encdec *sw; struct enc_xform *exf; int i, blks, inlen, ivlen, outlen, resid; struct crypto_buffer_cursor cc_in, cc_out; const unsigned char *inblk; unsigned char *outblk; int error; bool encrypting; error = 0; sw = &ses->swcr_encdec; exf = sw->sw_exf; ivlen = exf->ivsize; if (exf->native_blocksize == 0) { /* Check for non-padded data */ if ((crp->crp_payload_length % exf->blocksize) != 0) return (EINVAL); blks = exf->blocksize; } else blks = exf->native_blocksize; if (exf == &enc_xform_aes_icm && (crp->crp_flags & CRYPTO_F_IV_SEPARATE) == 0) return (EINVAL); if (crp->crp_cipher_key != NULL) { csp = crypto_get_params(crp->crp_session); error = exf->setkey(sw->sw_kschedule, crp->crp_cipher_key, csp->csp_cipher_klen); if (error) return (error); } crypto_read_iv(crp, iv); if (exf->reinit) { /* * xforms that provide a reinit method perform all IV * handling themselves. */ exf->reinit(sw->sw_kschedule, iv); } ivp = iv; crypto_cursor_init(&cc_in, &crp->crp_buf); crypto_cursor_advance(&cc_in, crp->crp_payload_start); inlen = crypto_cursor_seglen(&cc_in); inblk = crypto_cursor_segbase(&cc_in); if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { crypto_cursor_init(&cc_out, &crp->crp_obuf); crypto_cursor_advance(&cc_out, crp->crp_payload_output_start); } else cc_out = cc_in; outlen = crypto_cursor_seglen(&cc_out); outblk = crypto_cursor_segbase(&cc_out); resid = crp->crp_payload_length; encrypting = CRYPTO_OP_IS_ENCRYPT(crp->crp_op); /* * Loop through encrypting blocks. 'inlen' is the remaining * length of the current segment in the input buffer. * 'outlen' is the remaining length of current segment in the * output buffer. */ while (resid >= blks) { /* * If the current block is not contained within the * current input/output segment, use 'blk' as a local * buffer. */ if (inlen < blks) { crypto_cursor_copydata(&cc_in, blks, blk); inblk = blk; } if (outlen < blks) outblk = blk; /* * Ciphers without a 'reinit' hook are assumed to be * used in CBC mode where the chaining is done here. */ if (exf->reinit != NULL) { if (encrypting) exf->encrypt(sw->sw_kschedule, inblk, outblk); else exf->decrypt(sw->sw_kschedule, inblk, outblk); } else if (encrypting) { /* XOR with previous block */ for (i = 0; i < blks; i++) outblk[i] = inblk[i] ^ ivp[i]; exf->encrypt(sw->sw_kschedule, outblk, outblk); /* * Keep encrypted block for XOR'ing * with next block */ memcpy(iv, outblk, blks); ivp = iv; } else { /* decrypt */ /* * Keep encrypted block for XOR'ing * with next block */ nivp = (ivp == iv) ? iv2 : iv; memcpy(nivp, inblk, blks); exf->decrypt(sw->sw_kschedule, inblk, outblk); /* XOR with previous block */ for (i = 0; i < blks; i++) outblk[i] ^= ivp[i]; ivp = nivp; } if (inlen < blks) { inlen = crypto_cursor_seglen(&cc_in); inblk = crypto_cursor_segbase(&cc_in); } else { crypto_cursor_advance(&cc_in, blks); inlen -= blks; inblk += blks; } if (outlen < blks) { crypto_cursor_copyback(&cc_out, blks, blk); outlen = crypto_cursor_seglen(&cc_out); outblk = crypto_cursor_segbase(&cc_out); } else { crypto_cursor_advance(&cc_out, blks); outlen -= blks; outblk += blks; } resid -= blks; } /* Handle trailing partial block for stream ciphers. */ if (resid > 0) { KASSERT(exf->native_blocksize != 0, ("%s: partial block of %d bytes for cipher %s", __func__, i, exf->name)); KASSERT(exf->reinit != NULL, ("%s: partial block cipher %s without reinit hook", __func__, exf->name)); KASSERT(resid < blks, ("%s: partial block too big", __func__)); inlen = crypto_cursor_seglen(&cc_in); outlen = crypto_cursor_seglen(&cc_out); if (inlen < resid) { crypto_cursor_copydata(&cc_in, resid, blk); inblk = blk; } else inblk = crypto_cursor_segbase(&cc_in); if (outlen < resid) outblk = blk; else outblk = crypto_cursor_segbase(&cc_out); if (encrypting) exf->encrypt_last(sw->sw_kschedule, inblk, outblk, resid); else exf->decrypt_last(sw->sw_kschedule, inblk, outblk, resid); if (outlen < resid) crypto_cursor_copyback(&cc_out, resid, blk); } explicit_bzero(blk, sizeof(blk)); explicit_bzero(iv, sizeof(iv)); explicit_bzero(iv2, sizeof(iv2)); return (0); } static void swcr_authprepare(struct auth_hash *axf, struct swcr_auth *sw, const uint8_t *key, int klen) { switch (axf->type) { case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_224_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: case CRYPTO_NULL_HMAC: case CRYPTO_RIPEMD160_HMAC: hmac_init_ipad(axf, key, klen, sw->sw_ictx); hmac_init_opad(axf, key, klen, sw->sw_octx); break; case CRYPTO_POLY1305: case CRYPTO_BLAKE2B: case CRYPTO_BLAKE2S: axf->Setkey(sw->sw_ictx, key, klen); axf->Init(sw->sw_ictx); break; default: panic("%s: algorithm %d doesn't use keys", __func__, axf->type); } } /* * Compute or verify hash. */ static int swcr_authcompute(struct swcr_session *ses, struct cryptop *crp) { u_char aalg[HASH_MAX_LEN]; const struct crypto_session_params *csp; struct swcr_auth *sw; struct auth_hash *axf; union authctx ctx; int err; sw = &ses->swcr_auth; axf = sw->sw_axf; if (crp->crp_auth_key != NULL) { csp = crypto_get_params(crp->crp_session); swcr_authprepare(axf, sw, crp->crp_auth_key, csp->csp_auth_klen); } bcopy(sw->sw_ictx, &ctx, axf->ctxsize); if (crp->crp_aad != NULL) err = axf->Update(&ctx, crp->crp_aad, crp->crp_aad_length); else err = crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length, axf->Update, &ctx); if (err) return err; if (CRYPTO_HAS_OUTPUT_BUFFER(crp) && CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) err = crypto_apply_buf(&crp->crp_obuf, crp->crp_payload_output_start, crp->crp_payload_length, axf->Update, &ctx); else err = crypto_apply(crp, crp->crp_payload_start, crp->crp_payload_length, axf->Update, &ctx); if (err) return err; switch (axf->type) { case CRYPTO_SHA1: case CRYPTO_SHA2_224: case CRYPTO_SHA2_256: case CRYPTO_SHA2_384: case CRYPTO_SHA2_512: axf->Final(aalg, &ctx); break; case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_224_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: case CRYPTO_RIPEMD160_HMAC: if (sw->sw_octx == NULL) return EINVAL; axf->Final(aalg, &ctx); bcopy(sw->sw_octx, &ctx, axf->ctxsize); axf->Update(&ctx, aalg, axf->hashsize); axf->Final(aalg, &ctx); break; case CRYPTO_BLAKE2B: case CRYPTO_BLAKE2S: case CRYPTO_NULL_HMAC: case CRYPTO_POLY1305: axf->Final(aalg, &ctx); break; } if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { u_char uaalg[HASH_MAX_LEN]; crypto_copydata(crp, crp->crp_digest_start, sw->sw_mlen, uaalg); if (timingsafe_bcmp(aalg, uaalg, sw->sw_mlen) != 0) err = EBADMSG; explicit_bzero(uaalg, sizeof(uaalg)); } else { /* Inject the authentication data */ crypto_copyback(crp, crp->crp_digest_start, sw->sw_mlen, aalg); } explicit_bzero(aalg, sizeof(aalg)); return (err); } CTASSERT(INT_MAX <= (1ll<<39) - 256); /* GCM: plain text < 2^39-256 */ CTASSERT(INT_MAX <= (uint64_t)-1); /* GCM: associated data <= 2^64-1 */ static int swcr_gmac(struct swcr_session *ses, struct cryptop *crp) { uint32_t blkbuf[howmany(AES_BLOCK_LEN, sizeof(uint32_t))]; u_char *blk = (u_char *)blkbuf; u_char tag[GMAC_DIGEST_LEN]; u_char iv[AES_BLOCK_LEN]; struct crypto_buffer_cursor cc; const u_char *inblk; union authctx ctx; struct swcr_auth *swa; struct auth_hash *axf; uint32_t *blkp; int blksz, error, ivlen, len, resid; swa = &ses->swcr_auth; axf = swa->sw_axf; bcopy(swa->sw_ictx, &ctx, axf->ctxsize); blksz = GMAC_BLOCK_LEN; KASSERT(axf->blocksize == blksz, ("%s: axf block size mismatch", __func__)); /* Initialize the IV */ ivlen = AES_GCM_IV_LEN; crypto_read_iv(crp, iv); axf->Reinit(&ctx, iv, ivlen); crypto_cursor_init(&cc, &crp->crp_buf); crypto_cursor_advance(&cc, crp->crp_payload_start); for (resid = crp->crp_payload_length; resid >= blksz; resid -= len) { len = crypto_cursor_seglen(&cc); if (len >= blksz) { inblk = crypto_cursor_segbase(&cc); len = rounddown(MIN(len, resid), blksz); crypto_cursor_advance(&cc, len); } else { len = blksz; crypto_cursor_copydata(&cc, len, blk); inblk = blk; } axf->Update(&ctx, inblk, len); } if (resid > 0) { memset(blk, 0, blksz); crypto_cursor_copydata(&cc, resid, blk); axf->Update(&ctx, blk, blksz); } /* length block */ memset(blk, 0, blksz); blkp = (uint32_t *)blk + 1; *blkp = htobe32(crp->crp_payload_length * 8); axf->Update(&ctx, blk, blksz); /* Finalize MAC */ axf->Final(tag, &ctx); error = 0; if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { u_char tag2[GMAC_DIGEST_LEN]; crypto_copydata(crp, crp->crp_digest_start, swa->sw_mlen, tag2); if (timingsafe_bcmp(tag, tag2, swa->sw_mlen) != 0) error = EBADMSG; explicit_bzero(tag2, sizeof(tag2)); } else { /* Inject the authentication data */ crypto_copyback(crp, crp->crp_digest_start, swa->sw_mlen, tag); } explicit_bzero(blkbuf, sizeof(blkbuf)); explicit_bzero(tag, sizeof(tag)); explicit_bzero(iv, sizeof(iv)); return (error); } static int swcr_gcm(struct swcr_session *ses, struct cryptop *crp) { uint32_t blkbuf[howmany(AES_BLOCK_LEN, sizeof(uint32_t))]; u_char *blk = (u_char *)blkbuf; u_char tag[GMAC_DIGEST_LEN]; u_char iv[AES_BLOCK_LEN]; struct crypto_buffer_cursor cc_in, cc_out; const u_char *inblk; u_char *outblk; union authctx ctx; struct swcr_auth *swa; struct swcr_encdec *swe; struct auth_hash *axf; struct enc_xform *exf; uint32_t *blkp; int blksz, error, ivlen, len, r, resid; swa = &ses->swcr_auth; axf = swa->sw_axf; bcopy(swa->sw_ictx, &ctx, axf->ctxsize); blksz = GMAC_BLOCK_LEN; KASSERT(axf->blocksize == blksz, ("%s: axf block size mismatch", __func__)); swe = &ses->swcr_encdec; exf = swe->sw_exf; KASSERT(axf->blocksize == exf->native_blocksize, ("%s: blocksize mismatch", __func__)); if ((crp->crp_flags & CRYPTO_F_IV_SEPARATE) == 0) return (EINVAL); /* Initialize the IV */ ivlen = AES_GCM_IV_LEN; bcopy(crp->crp_iv, iv, ivlen); /* Supply MAC with IV */ axf->Reinit(&ctx, iv, ivlen); /* Supply MAC with AAD */ if (crp->crp_aad != NULL) { len = rounddown(crp->crp_aad_length, blksz); if (len != 0) axf->Update(&ctx, crp->crp_aad, len); if (crp->crp_aad_length != len) { memset(blk, 0, blksz); memcpy(blk, (char *)crp->crp_aad + len, crp->crp_aad_length - len); axf->Update(&ctx, blk, blksz); } } else { crypto_cursor_init(&cc_in, &crp->crp_buf); crypto_cursor_advance(&cc_in, crp->crp_aad_start); for (resid = crp->crp_aad_length; resid >= blksz; resid -= len) { len = crypto_cursor_seglen(&cc_in); if (len >= blksz) { inblk = crypto_cursor_segbase(&cc_in); len = rounddown(MIN(len, resid), blksz); crypto_cursor_advance(&cc_in, len); } else { len = blksz; crypto_cursor_copydata(&cc_in, len, blk); inblk = blk; } axf->Update(&ctx, inblk, len); } if (resid > 0) { memset(blk, 0, blksz); crypto_cursor_copydata(&cc_in, resid, blk); axf->Update(&ctx, blk, blksz); } } exf->reinit(swe->sw_kschedule, iv); /* Do encryption with MAC */ crypto_cursor_init(&cc_in, &crp->crp_buf); crypto_cursor_advance(&cc_in, crp->crp_payload_start); if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { crypto_cursor_init(&cc_out, &crp->crp_obuf); crypto_cursor_advance(&cc_out, crp->crp_payload_output_start); } else cc_out = cc_in; for (resid = crp->crp_payload_length; resid >= blksz; resid -= blksz) { if (crypto_cursor_seglen(&cc_in) < blksz) { crypto_cursor_copydata(&cc_in, blksz, blk); inblk = blk; } else { inblk = crypto_cursor_segbase(&cc_in); crypto_cursor_advance(&cc_in, blksz); } if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { if (crypto_cursor_seglen(&cc_out) < blksz) outblk = blk; else outblk = crypto_cursor_segbase(&cc_out); exf->encrypt(swe->sw_kschedule, inblk, outblk); axf->Update(&ctx, outblk, blksz); if (outblk == blk) crypto_cursor_copyback(&cc_out, blksz, blk); else crypto_cursor_advance(&cc_out, blksz); } else { axf->Update(&ctx, inblk, blksz); } } if (resid > 0) { crypto_cursor_copydata(&cc_in, resid, blk); if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { exf->encrypt_last(swe->sw_kschedule, blk, blk, resid); crypto_cursor_copyback(&cc_out, resid, blk); } axf->Update(&ctx, blk, resid); } /* length block */ memset(blk, 0, blksz); blkp = (uint32_t *)blk + 1; *blkp = htobe32(crp->crp_aad_length * 8); blkp = (uint32_t *)blk + 3; *blkp = htobe32(crp->crp_payload_length * 8); axf->Update(&ctx, blk, blksz); /* Finalize MAC */ axf->Final(tag, &ctx); /* Validate tag */ error = 0; if (!CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { u_char tag2[GMAC_DIGEST_LEN]; crypto_copydata(crp, crp->crp_digest_start, swa->sw_mlen, tag2); r = timingsafe_bcmp(tag, tag2, swa->sw_mlen); explicit_bzero(tag2, sizeof(tag2)); if (r != 0) { error = EBADMSG; goto out; } /* tag matches, decrypt data */ crypto_cursor_init(&cc_in, &crp->crp_buf); crypto_cursor_advance(&cc_in, crp->crp_payload_start); for (resid = crp->crp_payload_length; resid > blksz; resid -= blksz) { if (crypto_cursor_seglen(&cc_in) < blksz) { crypto_cursor_copydata(&cc_in, blksz, blk); inblk = blk; } else { inblk = crypto_cursor_segbase(&cc_in); crypto_cursor_advance(&cc_in, blksz); } if (crypto_cursor_seglen(&cc_out) < blksz) outblk = blk; else outblk = crypto_cursor_segbase(&cc_out); exf->decrypt(swe->sw_kschedule, inblk, outblk); if (outblk == blk) crypto_cursor_copyback(&cc_out, blksz, blk); else crypto_cursor_advance(&cc_out, blksz); } if (resid > 0) { crypto_cursor_copydata(&cc_in, resid, blk); exf->decrypt_last(swe->sw_kschedule, blk, blk, resid); crypto_cursor_copyback(&cc_out, resid, blk); } } else { /* Inject the authentication data */ crypto_copyback(crp, crp->crp_digest_start, swa->sw_mlen, tag); } out: explicit_bzero(blkbuf, sizeof(blkbuf)); explicit_bzero(tag, sizeof(tag)); explicit_bzero(iv, sizeof(iv)); return (error); } static int swcr_ccm_cbc_mac(struct swcr_session *ses, struct cryptop *crp) { u_char tag[AES_CBC_MAC_HASH_LEN]; u_char iv[AES_BLOCK_LEN]; union authctx ctx; struct swcr_auth *swa; struct auth_hash *axf; int error, ivlen; swa = &ses->swcr_auth; axf = swa->sw_axf; bcopy(swa->sw_ictx, &ctx, axf->ctxsize); /* Initialize the IV */ ivlen = AES_CCM_IV_LEN; crypto_read_iv(crp, iv); /* * AES CCM-CBC-MAC needs to know the length of both the auth * data and payload data before doing the auth computation. */ ctx.aes_cbc_mac_ctx.authDataLength = crp->crp_payload_length; ctx.aes_cbc_mac_ctx.cryptDataLength = 0; axf->Reinit(&ctx, iv, ivlen); if (crp->crp_aad != NULL) error = axf->Update(&ctx, crp->crp_aad, crp->crp_aad_length); else error = crypto_apply(crp, crp->crp_payload_start, crp->crp_payload_length, axf->Update, &ctx); if (error) return (error); /* Finalize MAC */ axf->Final(tag, &ctx); if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) { u_char tag2[AES_CBC_MAC_HASH_LEN]; crypto_copydata(crp, crp->crp_digest_start, swa->sw_mlen, tag2); if (timingsafe_bcmp(tag, tag2, swa->sw_mlen) != 0) error = EBADMSG; explicit_bzero(tag2, sizeof(tag)); } else { /* Inject the authentication data */ crypto_copyback(crp, crp->crp_digest_start, swa->sw_mlen, tag); } explicit_bzero(tag, sizeof(tag)); explicit_bzero(iv, sizeof(iv)); return (error); } static int swcr_ccm(struct swcr_session *ses, struct cryptop *crp) { uint32_t blkbuf[howmany(AES_BLOCK_LEN, sizeof(uint32_t))]; u_char *blk = (u_char *)blkbuf; u_char tag[AES_CBC_MAC_HASH_LEN]; u_char iv[AES_BLOCK_LEN]; struct crypto_buffer_cursor cc_in, cc_out; const u_char *inblk; u_char *outblk; union authctx ctx; struct swcr_auth *swa; struct swcr_encdec *swe; struct auth_hash *axf; struct enc_xform *exf; int blksz, error, ivlen, r, resid; swa = &ses->swcr_auth; axf = swa->sw_axf; bcopy(swa->sw_ictx, &ctx, axf->ctxsize); blksz = AES_BLOCK_LEN; KASSERT(axf->blocksize == blksz, ("%s: axf block size mismatch", __func__)); swe = &ses->swcr_encdec; exf = swe->sw_exf; KASSERT(axf->blocksize == exf->native_blocksize, ("%s: blocksize mismatch", __func__)); if ((crp->crp_flags & CRYPTO_F_IV_SEPARATE) == 0) return (EINVAL); /* Initialize the IV */ ivlen = AES_CCM_IV_LEN; bcopy(crp->crp_iv, iv, ivlen); /* * AES CCM-CBC-MAC needs to know the length of both the auth * data and payload data before doing the auth computation. */ ctx.aes_cbc_mac_ctx.authDataLength = crp->crp_aad_length; ctx.aes_cbc_mac_ctx.cryptDataLength = crp->crp_payload_length; /* Supply MAC with IV */ axf->Reinit(&ctx, iv, ivlen); /* Supply MAC with AAD */ if (crp->crp_aad != NULL) error = axf->Update(&ctx, crp->crp_aad, crp->crp_aad_length); else error = crypto_apply(crp, crp->crp_aad_start, crp->crp_aad_length, axf->Update, &ctx); if (error) return (error); exf->reinit(swe->sw_kschedule, iv); /* Do encryption/decryption with MAC */ crypto_cursor_init(&cc_in, &crp->crp_buf); crypto_cursor_advance(&cc_in, crp->crp_payload_start); if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { crypto_cursor_init(&cc_out, &crp->crp_obuf); crypto_cursor_advance(&cc_out, crp->crp_payload_output_start); } else cc_out = cc_in; for (resid = crp->crp_payload_length; resid >= blksz; resid -= blksz) { if (crypto_cursor_seglen(&cc_in) < blksz) { crypto_cursor_copydata(&cc_in, blksz, blk); inblk = blk; } else { inblk = crypto_cursor_segbase(&cc_in); crypto_cursor_advance(&cc_in, blksz); } if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { if (crypto_cursor_seglen(&cc_out) < blksz) outblk = blk; else outblk = crypto_cursor_segbase(&cc_out); axf->Update(&ctx, inblk, blksz); exf->encrypt(swe->sw_kschedule, inblk, outblk); if (outblk == blk) crypto_cursor_copyback(&cc_out, blksz, blk); else crypto_cursor_advance(&cc_out, blksz); } else { /* * One of the problems with CCM+CBC is that * the authentication is done on the * unencrypted data. As a result, we have to * decrypt the data twice: once to generate * the tag and a second time after the tag is * verified. */ exf->decrypt(swe->sw_kschedule, inblk, blk); axf->Update(&ctx, blk, blksz); } } if (resid > 0) { crypto_cursor_copydata(&cc_in, resid, blk); if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { axf->Update(&ctx, blk, resid); exf->encrypt_last(swe->sw_kschedule, blk, blk, resid); crypto_cursor_copyback(&cc_out, resid, blk); } else { exf->decrypt_last(swe->sw_kschedule, blk, blk, resid); axf->Update(&ctx, blk, resid); } } /* Finalize MAC */ axf->Final(tag, &ctx); /* Validate tag */ error = 0; if (!CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { u_char tag2[AES_CBC_MAC_HASH_LEN]; crypto_copydata(crp, crp->crp_digest_start, swa->sw_mlen, tag2); r = timingsafe_bcmp(tag, tag2, swa->sw_mlen); explicit_bzero(tag2, sizeof(tag2)); if (r != 0) { error = EBADMSG; goto out; } /* tag matches, decrypt data */ exf->reinit(swe->sw_kschedule, iv); crypto_cursor_init(&cc_in, &crp->crp_buf); crypto_cursor_advance(&cc_in, crp->crp_payload_start); for (resid = crp->crp_payload_length; resid > blksz; resid -= blksz) { if (crypto_cursor_seglen(&cc_in) < blksz) { crypto_cursor_copydata(&cc_in, blksz, blk); inblk = blk; } else { inblk = crypto_cursor_segbase(&cc_in); crypto_cursor_advance(&cc_in, blksz); } if (crypto_cursor_seglen(&cc_out) < blksz) outblk = blk; else outblk = crypto_cursor_segbase(&cc_out); exf->decrypt(swe->sw_kschedule, inblk, outblk); if (outblk == blk) crypto_cursor_copyback(&cc_out, blksz, blk); else crypto_cursor_advance(&cc_out, blksz); } if (resid > 0) { crypto_cursor_copydata(&cc_in, resid, blk); exf->decrypt_last(swe->sw_kschedule, blk, blk, resid); crypto_cursor_copyback(&cc_out, resid, blk); } } else { /* Inject the authentication data */ crypto_copyback(crp, crp->crp_digest_start, swa->sw_mlen, tag); } out: explicit_bzero(blkbuf, sizeof(blkbuf)); explicit_bzero(tag, sizeof(tag)); explicit_bzero(iv, sizeof(iv)); return (error); } /* * Apply a cipher and a digest to perform EtA. */ static int swcr_eta(struct swcr_session *ses, struct cryptop *crp) { int error; if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op)) { error = swcr_encdec(ses, crp); if (error == 0) error = swcr_authcompute(ses, crp); } else { error = swcr_authcompute(ses, crp); if (error == 0) error = swcr_encdec(ses, crp); } return (error); } /* * Apply a compression/decompression algorithm */ static int swcr_compdec(struct swcr_session *ses, struct cryptop *crp) { u_int8_t *data, *out; struct comp_algo *cxf; int adj; u_int32_t result; cxf = ses->swcr_compdec.sw_cxf; /* We must handle the whole buffer of data in one time * then if there is not all the data in the mbuf, we must * copy in a buffer. */ data = malloc(crp->crp_payload_length, M_CRYPTO_DATA, M_NOWAIT); if (data == NULL) return (EINVAL); crypto_copydata(crp, crp->crp_payload_start, crp->crp_payload_length, data); if (CRYPTO_OP_IS_COMPRESS(crp->crp_op)) result = cxf->compress(data, crp->crp_payload_length, &out); else result = cxf->decompress(data, crp->crp_payload_length, &out); free(data, M_CRYPTO_DATA); if (result == 0) return (EINVAL); crp->crp_olen = result; /* Check the compressed size when doing compression */ if (CRYPTO_OP_IS_COMPRESS(crp->crp_op)) { if (result >= crp->crp_payload_length) { /* Compression was useless, we lost time */ free(out, M_CRYPTO_DATA); return (0); } } /* Copy back the (de)compressed data. m_copyback is * extending the mbuf as necessary. */ crypto_copyback(crp, crp->crp_payload_start, result, out); if (result < crp->crp_payload_length) { switch (crp->crp_buf.cb_type) { case CRYPTO_BUF_MBUF: adj = result - crp->crp_payload_length; m_adj(crp->crp_buf.cb_mbuf, adj); break; case CRYPTO_BUF_UIO: { struct uio *uio = crp->crp_buf.cb_uio; int ind; adj = crp->crp_payload_length - result; ind = uio->uio_iovcnt - 1; while (adj > 0 && ind >= 0) { if (adj < uio->uio_iov[ind].iov_len) { uio->uio_iov[ind].iov_len -= adj; break; } adj -= uio->uio_iov[ind].iov_len; uio->uio_iov[ind].iov_len = 0; ind--; uio->uio_iovcnt--; } } break; default: break; } } free(out, M_CRYPTO_DATA); return 0; } static int swcr_setup_cipher(struct swcr_session *ses, const struct crypto_session_params *csp) { struct swcr_encdec *swe; struct enc_xform *txf; int error; swe = &ses->swcr_encdec; txf = crypto_cipher(csp); MPASS(txf->ivsize == csp->csp_ivlen); if (txf->ctxsize != 0) { swe->sw_kschedule = malloc(txf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if (swe->sw_kschedule == NULL) return (ENOMEM); } if (csp->csp_cipher_key != NULL) { error = txf->setkey(swe->sw_kschedule, csp->csp_cipher_key, csp->csp_cipher_klen); if (error) return (error); } swe->sw_exf = txf; return (0); } static int swcr_setup_auth(struct swcr_session *ses, const struct crypto_session_params *csp) { struct swcr_auth *swa; struct auth_hash *axf; swa = &ses->swcr_auth; axf = crypto_auth_hash(csp); swa->sw_axf = axf; if (csp->csp_auth_mlen < 0 || csp->csp_auth_mlen > axf->hashsize) return (EINVAL); if (csp->csp_auth_mlen == 0) swa->sw_mlen = axf->hashsize; else swa->sw_mlen = csp->csp_auth_mlen; swa->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if (swa->sw_ictx == NULL) return (ENOBUFS); switch (csp->csp_auth_alg) { case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_224_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: case CRYPTO_NULL_HMAC: case CRYPTO_RIPEMD160_HMAC: swa->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if (swa->sw_octx == NULL) return (ENOBUFS); if (csp->csp_auth_key != NULL) { swcr_authprepare(axf, swa, csp->csp_auth_key, csp->csp_auth_klen); } if (csp->csp_mode == CSP_MODE_DIGEST) ses->swcr_process = swcr_authcompute; break; case CRYPTO_SHA1: case CRYPTO_SHA2_224: case CRYPTO_SHA2_256: case CRYPTO_SHA2_384: case CRYPTO_SHA2_512: axf->Init(swa->sw_ictx); if (csp->csp_mode == CSP_MODE_DIGEST) ses->swcr_process = swcr_authcompute; break; case CRYPTO_AES_NIST_GMAC: axf->Init(swa->sw_ictx); axf->Setkey(swa->sw_ictx, csp->csp_auth_key, csp->csp_auth_klen); if (csp->csp_mode == CSP_MODE_DIGEST) ses->swcr_process = swcr_gmac; break; case CRYPTO_POLY1305: case CRYPTO_BLAKE2B: case CRYPTO_BLAKE2S: /* * Blake2b and Blake2s support an optional key but do * not require one. */ if (csp->csp_auth_klen == 0 || csp->csp_auth_key != NULL) axf->Setkey(swa->sw_ictx, csp->csp_auth_key, csp->csp_auth_klen); axf->Init(swa->sw_ictx); if (csp->csp_mode == CSP_MODE_DIGEST) ses->swcr_process = swcr_authcompute; break; case CRYPTO_AES_CCM_CBC_MAC: axf->Init(swa->sw_ictx); axf->Setkey(swa->sw_ictx, csp->csp_auth_key, csp->csp_auth_klen); if (csp->csp_mode == CSP_MODE_DIGEST) ses->swcr_process = swcr_ccm_cbc_mac; break; } return (0); } static int swcr_setup_gcm(struct swcr_session *ses, const struct crypto_session_params *csp) { struct swcr_auth *swa; struct auth_hash *axf; if (csp->csp_ivlen != AES_GCM_IV_LEN) return (EINVAL); /* First, setup the auth side. */ swa = &ses->swcr_auth; switch (csp->csp_cipher_klen * 8) { case 128: axf = &auth_hash_nist_gmac_aes_128; break; case 192: axf = &auth_hash_nist_gmac_aes_192; break; case 256: axf = &auth_hash_nist_gmac_aes_256; break; default: return (EINVAL); } swa->sw_axf = axf; if (csp->csp_auth_mlen < 0 || csp->csp_auth_mlen > axf->hashsize) return (EINVAL); if (csp->csp_auth_mlen == 0) swa->sw_mlen = axf->hashsize; else swa->sw_mlen = csp->csp_auth_mlen; swa->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if (swa->sw_ictx == NULL) return (ENOBUFS); axf->Init(swa->sw_ictx); if (csp->csp_cipher_key != NULL) axf->Setkey(swa->sw_ictx, csp->csp_cipher_key, csp->csp_cipher_klen); /* Second, setup the cipher side. */ return (swcr_setup_cipher(ses, csp)); } static int swcr_setup_ccm(struct swcr_session *ses, const struct crypto_session_params *csp) { struct swcr_auth *swa; struct auth_hash *axf; if (csp->csp_ivlen != AES_CCM_IV_LEN) return (EINVAL); /* First, setup the auth side. */ swa = &ses->swcr_auth; switch (csp->csp_cipher_klen * 8) { case 128: axf = &auth_hash_ccm_cbc_mac_128; break; case 192: axf = &auth_hash_ccm_cbc_mac_192; break; case 256: axf = &auth_hash_ccm_cbc_mac_256; break; default: return (EINVAL); } swa->sw_axf = axf; if (csp->csp_auth_mlen < 0 || csp->csp_auth_mlen > axf->hashsize) return (EINVAL); if (csp->csp_auth_mlen == 0) swa->sw_mlen = axf->hashsize; else swa->sw_mlen = csp->csp_auth_mlen; swa->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, M_NOWAIT); if (swa->sw_ictx == NULL) return (ENOBUFS); axf->Init(swa->sw_ictx); if (csp->csp_cipher_key != NULL) axf->Setkey(swa->sw_ictx, csp->csp_cipher_key, csp->csp_cipher_klen); /* Second, setup the cipher side. */ return (swcr_setup_cipher(ses, csp)); } static bool swcr_auth_supported(const struct crypto_session_params *csp) { struct auth_hash *axf; axf = crypto_auth_hash(csp); if (axf == NULL) return (false); switch (csp->csp_auth_alg) { case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_224_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: case CRYPTO_NULL_HMAC: case CRYPTO_RIPEMD160_HMAC: break; case CRYPTO_AES_NIST_GMAC: switch (csp->csp_auth_klen * 8) { case 128: case 192: case 256: break; default: return (false); } if (csp->csp_auth_key == NULL) return (false); if (csp->csp_ivlen != AES_GCM_IV_LEN) return (false); break; case CRYPTO_POLY1305: if (csp->csp_auth_klen != POLY1305_KEY_LEN) return (false); break; case CRYPTO_AES_CCM_CBC_MAC: switch (csp->csp_auth_klen * 8) { case 128: case 192: case 256: break; default: return (false); } if (csp->csp_auth_key == NULL) return (false); if (csp->csp_ivlen != AES_CCM_IV_LEN) return (false); break; } return (true); } static bool swcr_cipher_supported(const struct crypto_session_params *csp) { struct enc_xform *txf; txf = crypto_cipher(csp); if (txf == NULL) return (false); if (csp->csp_cipher_alg != CRYPTO_NULL_CBC && txf->ivsize != csp->csp_ivlen) return (false); return (true); } static int swcr_probesession(device_t dev, const struct crypto_session_params *csp) { if ((csp->csp_flags & ~(CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD)) != 0) return (EINVAL); switch (csp->csp_mode) { case CSP_MODE_COMPRESS: switch (csp->csp_cipher_alg) { case CRYPTO_DEFLATE_COMP: break; default: return (EINVAL); } break; case CSP_MODE_CIPHER: switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: return (EINVAL); default: if (!swcr_cipher_supported(csp)) return (EINVAL); break; } break; case CSP_MODE_DIGEST: if (!swcr_auth_supported(csp)) return (EINVAL); break; case CSP_MODE_AEAD: switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: break; default: return (EINVAL); } break; case CSP_MODE_ETA: /* AEAD algorithms cannot be used for EtA. */ switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: return (EINVAL); } switch (csp->csp_auth_alg) { case CRYPTO_AES_NIST_GMAC: case CRYPTO_AES_CCM_CBC_MAC: return (EINVAL); } if (!swcr_cipher_supported(csp) || !swcr_auth_supported(csp)) return (EINVAL); break; default: return (EINVAL); } return (CRYPTODEV_PROBE_SOFTWARE); } /* * Generate a new software session. */ static int swcr_newsession(device_t dev, crypto_session_t cses, const struct crypto_session_params *csp) { struct swcr_session *ses; struct swcr_encdec *swe; struct swcr_auth *swa; struct comp_algo *cxf; int error; ses = crypto_get_driver_session(cses); mtx_init(&ses->swcr_lock, "swcr session lock", NULL, MTX_DEF); error = 0; swe = &ses->swcr_encdec; swa = &ses->swcr_auth; switch (csp->csp_mode) { case CSP_MODE_COMPRESS: switch (csp->csp_cipher_alg) { case CRYPTO_DEFLATE_COMP: cxf = &comp_algo_deflate; break; #ifdef INVARIANTS default: panic("bad compression algo"); #endif } ses->swcr_compdec.sw_cxf = cxf; ses->swcr_process = swcr_compdec; break; case CSP_MODE_CIPHER: switch (csp->csp_cipher_alg) { case CRYPTO_NULL_CBC: ses->swcr_process = swcr_null; break; #ifdef INVARIANTS case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: panic("bad cipher algo"); #endif default: error = swcr_setup_cipher(ses, csp); if (error == 0) ses->swcr_process = swcr_encdec; } break; case CSP_MODE_DIGEST: error = swcr_setup_auth(ses, csp); break; case CSP_MODE_AEAD: switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: error = swcr_setup_gcm(ses, csp); if (error == 0) ses->swcr_process = swcr_gcm; break; case CRYPTO_AES_CCM_16: error = swcr_setup_ccm(ses, csp); if (error == 0) ses->swcr_process = swcr_ccm; break; #ifdef INVARIANTS default: panic("bad aead algo"); #endif } break; case CSP_MODE_ETA: #ifdef INVARIANTS switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_CCM_16: panic("bad eta cipher algo"); } switch (csp->csp_auth_alg) { case CRYPTO_AES_NIST_GMAC: case CRYPTO_AES_CCM_CBC_MAC: panic("bad eta auth algo"); } #endif error = swcr_setup_auth(ses, csp); if (error) break; if (csp->csp_cipher_alg == CRYPTO_NULL_CBC) { /* Effectively degrade to digest mode. */ ses->swcr_process = swcr_authcompute; break; } error = swcr_setup_cipher(ses, csp); if (error == 0) ses->swcr_process = swcr_eta; break; default: error = EINVAL; } if (error) swcr_freesession(dev, cses); return (error); } static void swcr_freesession(device_t dev, crypto_session_t cses) { struct swcr_session *ses; - struct swcr_auth *swa; - struct auth_hash *axf; ses = crypto_get_driver_session(cses); mtx_destroy(&ses->swcr_lock); zfree(ses->swcr_encdec.sw_kschedule, M_CRYPTO_DATA); - - axf = ses->swcr_auth.sw_axf; - if (axf != NULL) { - swa = &ses->swcr_auth; - if (swa->sw_ictx != NULL) { - explicit_bzero(swa->sw_ictx, axf->ctxsize); - free(swa->sw_ictx, M_CRYPTO_DATA); - } - if (swa->sw_octx != NULL) { - explicit_bzero(swa->sw_octx, axf->ctxsize); - free(swa->sw_octx, M_CRYPTO_DATA); - } - } + zfree(ses->swcr_auth.sw_ictx, M_CRYPTO_DATA); + zfree(ses->swcr_auth.sw_octx, M_CRYPTO_DATA); } /* * Process a software request. */ static int swcr_process(device_t dev, struct cryptop *crp, int hint) { struct swcr_session *ses; ses = crypto_get_driver_session(crp->crp_session); mtx_lock(&ses->swcr_lock); crp->crp_etype = ses->swcr_process(ses, crp); mtx_unlock(&ses->swcr_lock); crypto_done(crp); return (0); } static void swcr_identify(driver_t *drv, device_t parent) { /* NB: order 10 is so we get attached after h/w devices */ if (device_find_child(parent, "cryptosoft", -1) == NULL && BUS_ADD_CHILD(parent, 10, "cryptosoft", 0) == 0) panic("cryptosoft: could not attach"); } static int swcr_probe(device_t dev) { device_set_desc(dev, "software crypto"); return (BUS_PROBE_NOWILDCARD); } static int swcr_attach(device_t dev) { swcr_id = crypto_get_driverid(dev, sizeof(struct swcr_session), CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_SYNC); if (swcr_id < 0) { device_printf(dev, "cannot initialize!"); return (ENXIO); } return (0); } static int swcr_detach(device_t dev) { crypto_unregister_all(swcr_id); return 0; } static device_method_t swcr_methods[] = { DEVMETHOD(device_identify, swcr_identify), DEVMETHOD(device_probe, swcr_probe), DEVMETHOD(device_attach, swcr_attach), DEVMETHOD(device_detach, swcr_detach), DEVMETHOD(cryptodev_probesession, swcr_probesession), DEVMETHOD(cryptodev_newsession, swcr_newsession), DEVMETHOD(cryptodev_freesession,swcr_freesession), DEVMETHOD(cryptodev_process, swcr_process), {0, 0}, }; static driver_t swcr_driver = { "cryptosoft", swcr_methods, 0, /* NB: no softc */ }; static devclass_t swcr_devclass; /* * NB: We explicitly reference the crypto module so we * get the necessary ordering when built as a loadable * module. This is required because we bundle the crypto * module code together with the cryptosoft driver (otherwise * normal module dependencies would handle things). */ extern int crypto_modevent(struct module *, int, void *); /* XXX where to attach */ DRIVER_MODULE(cryptosoft, nexus, swcr_driver, swcr_devclass, crypto_modevent,0); MODULE_VERSION(cryptosoft, 1); MODULE_DEPEND(cryptosoft, crypto, 1, 1, 1); Index: head/sys/opencrypto/ktls_ocf.c =================================================================== --- head/sys/opencrypto/ktls_ocf.c (revision 362623) +++ head/sys/opencrypto/ktls_ocf.c (revision 362624) @@ -1,447 +1,446 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 Netflix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include struct ocf_session { crypto_session_t sid; struct mtx lock; }; struct ocf_operation { struct ocf_session *os; bool done; struct iovec iov[0]; }; static MALLOC_DEFINE(M_KTLS_OCF, "ktls_ocf", "OCF KTLS"); SYSCTL_DECL(_kern_ipc_tls); SYSCTL_DECL(_kern_ipc_tls_stats); static SYSCTL_NODE(_kern_ipc_tls_stats, OID_AUTO, ocf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Kernel TLS offload via OCF stats"); static counter_u64_t ocf_tls12_gcm_crypts; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats_ocf, OID_AUTO, tls12_gcm_crypts, CTLFLAG_RD, &ocf_tls12_gcm_crypts, "Total number of OCF TLS 1.2 GCM encryption operations"); static counter_u64_t ocf_tls13_gcm_crypts; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats_ocf, OID_AUTO, tls13_gcm_crypts, CTLFLAG_RD, &ocf_tls13_gcm_crypts, "Total number of OCF TLS 1.3 GCM encryption operations"); static counter_u64_t ocf_inplace; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats_ocf, OID_AUTO, inplace, CTLFLAG_RD, &ocf_inplace, "Total number of OCF in-place operations"); static counter_u64_t ocf_separate_output; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats_ocf, OID_AUTO, separate_output, CTLFLAG_RD, &ocf_separate_output, "Total number of OCF operations with a separate output buffer"); static counter_u64_t ocf_retries; SYSCTL_COUNTER_U64(_kern_ipc_tls_stats_ocf, OID_AUTO, retries, CTLFLAG_RD, &ocf_retries, "Number of OCF encryption operation retries"); static int ktls_ocf_callback(struct cryptop *crp) { struct ocf_operation *oo; oo = crp->crp_opaque; mtx_lock(&oo->os->lock); oo->done = true; mtx_unlock(&oo->os->lock); wakeup(oo); return (0); } static int ktls_ocf_tls12_gcm_encrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov, struct iovec *outiov, int iovcnt, uint64_t seqno, uint8_t record_type __unused) { struct uio uio, out_uio, *tag_uio; struct tls_aead_data ad; struct cryptop *crp; struct ocf_session *os; struct ocf_operation *oo; int i, error; uint16_t tls_comp_len; bool inplace; os = tls->cipher; oo = malloc(sizeof(*oo) + (iovcnt + 1) * sizeof(struct iovec), M_KTLS_OCF, M_WAITOK | M_ZERO); oo->os = os; uio.uio_iov = iniov; uio.uio_iovcnt = iovcnt; uio.uio_offset = 0; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; out_uio.uio_iov = outiov; out_uio.uio_iovcnt = iovcnt; out_uio.uio_offset = 0; out_uio.uio_segflg = UIO_SYSSPACE; out_uio.uio_td = curthread; crp = crypto_getreq(os->sid, M_WAITOK); /* Setup the IV. */ memcpy(crp->crp_iv, tls->params.iv, TLS_AEAD_GCM_LEN); memcpy(crp->crp_iv + TLS_AEAD_GCM_LEN, hdr + 1, sizeof(uint64_t)); /* Setup the AAD. */ tls_comp_len = ntohs(hdr->tls_length) - (AES_GMAC_HASH_LEN + sizeof(uint64_t)); ad.seq = htobe64(seqno); ad.type = hdr->tls_type; ad.tls_vmajor = hdr->tls_vmajor; ad.tls_vminor = hdr->tls_vminor; ad.tls_length = htons(tls_comp_len); crp->crp_aad = &ad; crp->crp_aad_length = sizeof(ad); /* Compute payload length and determine if encryption is in place. */ inplace = true; crp->crp_payload_start = 0; for (i = 0; i < iovcnt; i++) { if (iniov[i].iov_base != outiov[i].iov_base) inplace = false; crp->crp_payload_length += iniov[i].iov_len; } uio.uio_resid = crp->crp_payload_length; out_uio.uio_resid = crp->crp_payload_length; if (inplace) tag_uio = &uio; else tag_uio = &out_uio; /* Duplicate iovec and append vector for tag. */ memcpy(oo->iov, tag_uio->uio_iov, iovcnt * sizeof(struct iovec)); tag_uio->uio_iov = oo->iov; tag_uio->uio_iov[iovcnt].iov_base = trailer; tag_uio->uio_iov[iovcnt].iov_len = AES_GMAC_HASH_LEN; tag_uio->uio_iovcnt++; crp->crp_digest_start = tag_uio->uio_resid; tag_uio->uio_resid += AES_GMAC_HASH_LEN; crp->crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST; crp->crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE; crypto_use_uio(crp, &uio); if (!inplace) crypto_use_output_uio(crp, &out_uio); crp->crp_opaque = oo; crp->crp_callback = ktls_ocf_callback; counter_u64_add(ocf_tls12_gcm_crypts, 1); if (inplace) counter_u64_add(ocf_inplace, 1); else counter_u64_add(ocf_separate_output, 1); for (;;) { error = crypto_dispatch(crp); if (error) break; mtx_lock(&os->lock); while (!oo->done) mtx_sleep(oo, &os->lock, 0, "ocfktls", 0); mtx_unlock(&os->lock); if (crp->crp_etype != EAGAIN) { error = crp->crp_etype; break; } crp->crp_etype = 0; crp->crp_flags &= ~CRYPTO_F_DONE; oo->done = false; counter_u64_add(ocf_retries, 1); } crypto_freereq(crp); free(oo, M_KTLS_OCF); return (error); } static int ktls_ocf_tls13_gcm_encrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov, struct iovec *outiov, int iovcnt, uint64_t seqno, uint8_t record_type) { struct uio uio, out_uio; struct tls_aead_data_13 ad; char nonce[12]; struct cryptop *crp; struct ocf_session *os; struct ocf_operation *oo; struct iovec *iov, *out_iov; int i, error; bool inplace; os = tls->cipher; oo = malloc(sizeof(*oo) + (iovcnt + 1) * sizeof(*iov) * 2, M_KTLS_OCF, M_WAITOK | M_ZERO); oo->os = os; iov = oo->iov; out_iov = iov + iovcnt + 2; crp = crypto_getreq(os->sid, M_WAITOK); /* Setup the nonce. */ memcpy(nonce, tls->params.iv, tls->params.iv_len); *(uint64_t *)(nonce + 4) ^= htobe64(seqno); /* Setup the AAD. */ ad.type = hdr->tls_type; ad.tls_vmajor = hdr->tls_vmajor; ad.tls_vminor = hdr->tls_vminor; ad.tls_length = hdr->tls_length; crp->crp_aad = &ad; crp->crp_aad_length = sizeof(ad); /* Compute payload length and determine if encryption is in place. */ inplace = true; crp->crp_payload_start = 0; for (i = 0; i < iovcnt; i++) { if (iniov[i].iov_base != outiov[i].iov_base) inplace = false; crp->crp_payload_length += iniov[i].iov_len; } /* Store the record type as the first byte of the trailer. */ trailer[0] = record_type; crp->crp_payload_length++; crp->crp_digest_start = crp->crp_payload_length; /* * Duplicate the input iov to append the trailer. Always * include the full trailer as input to get the record_type * even if only the first byte is used. */ memcpy(iov, iniov, iovcnt * sizeof(*iov)); iov[iovcnt].iov_base = trailer; iov[iovcnt].iov_len = AES_GMAC_HASH_LEN + 1; uio.uio_iov = iov; uio.uio_iovcnt = iovcnt + 1; uio.uio_offset = 0; uio.uio_resid = crp->crp_payload_length + AES_GMAC_HASH_LEN; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; crypto_use_uio(crp, &uio); if (!inplace) { /* Duplicate the output iov to append the trailer. */ memcpy(out_iov, outiov, iovcnt * sizeof(*out_iov)); out_iov[iovcnt] = iov[iovcnt]; out_uio.uio_iov = out_iov; out_uio.uio_iovcnt = iovcnt + 1; out_uio.uio_offset = 0; out_uio.uio_resid = crp->crp_payload_length + AES_GMAC_HASH_LEN; out_uio.uio_segflg = UIO_SYSSPACE; out_uio.uio_td = curthread; crypto_use_output_uio(crp, &out_uio); } crp->crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST; crp->crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE; crp->crp_opaque = oo; crp->crp_callback = ktls_ocf_callback; memcpy(crp->crp_iv, nonce, sizeof(nonce)); counter_u64_add(ocf_tls13_gcm_crypts, 1); if (inplace) counter_u64_add(ocf_inplace, 1); else counter_u64_add(ocf_separate_output, 1); for (;;) { error = crypto_dispatch(crp); if (error) break; mtx_lock(&os->lock); while (!oo->done) mtx_sleep(oo, &os->lock, 0, "ocfktls", 0); mtx_unlock(&os->lock); if (crp->crp_etype != EAGAIN) { error = crp->crp_etype; break; } crp->crp_etype = 0; crp->crp_flags &= ~CRYPTO_F_DONE; oo->done = false; counter_u64_add(ocf_retries, 1); } crypto_freereq(crp); free(oo, M_KTLS_OCF); return (error); } static void ktls_ocf_free(struct ktls_session *tls) { struct ocf_session *os; os = tls->cipher; crypto_freesession(os->sid); mtx_destroy(&os->lock); - explicit_bzero(os, sizeof(*os)); - free(os, M_KTLS_OCF); + zfree(os, M_KTLS_OCF); } static int ktls_ocf_try(struct socket *so, struct ktls_session *tls) { struct crypto_session_params csp; struct ocf_session *os; int error; memset(&csp, 0, sizeof(csp)); csp.csp_flags |= CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD; switch (tls->params.cipher_algorithm) { case CRYPTO_AES_NIST_GCM_16: switch (tls->params.cipher_key_len) { case 128 / 8: case 256 / 8: break; default: return (EINVAL); } csp.csp_mode = CSP_MODE_AEAD; csp.csp_cipher_alg = CRYPTO_AES_NIST_GCM_16; csp.csp_cipher_key = tls->params.cipher_key; csp.csp_cipher_klen = tls->params.cipher_key_len; csp.csp_ivlen = AES_GCM_IV_LEN; break; default: return (EPROTONOSUPPORT); } /* Only TLS 1.2 and 1.3 are supported. */ if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || tls->params.tls_vminor < TLS_MINOR_VER_TWO || tls->params.tls_vminor > TLS_MINOR_VER_THREE) return (EPROTONOSUPPORT); os = malloc(sizeof(*os), M_KTLS_OCF, M_NOWAIT | M_ZERO); if (os == NULL) return (ENOMEM); error = crypto_newsession(&os->sid, &csp, CRYPTO_FLAG_HARDWARE | CRYPTO_FLAG_SOFTWARE); if (error) { free(os, M_KTLS_OCF); return (error); } mtx_init(&os->lock, "ktls_ocf", NULL, MTX_DEF); tls->cipher = os; if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) tls->sw_encrypt = ktls_ocf_tls13_gcm_encrypt; else tls->sw_encrypt = ktls_ocf_tls12_gcm_encrypt; tls->free = ktls_ocf_free; return (0); } struct ktls_crypto_backend ocf_backend = { .name = "OCF", .prio = 5, .api_version = KTLS_API_VERSION, .try = ktls_ocf_try, }; static int ktls_ocf_modevent(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: ocf_tls12_gcm_crypts = counter_u64_alloc(M_WAITOK); ocf_tls13_gcm_crypts = counter_u64_alloc(M_WAITOK); ocf_inplace = counter_u64_alloc(M_WAITOK); ocf_separate_output = counter_u64_alloc(M_WAITOK); ocf_retries = counter_u64_alloc(M_WAITOK); return (ktls_crypto_backend_register(&ocf_backend)); case MOD_UNLOAD: error = ktls_crypto_backend_deregister(&ocf_backend); if (error) return (error); counter_u64_free(ocf_tls12_gcm_crypts); counter_u64_free(ocf_tls13_gcm_crypts); counter_u64_free(ocf_inplace); counter_u64_free(ocf_separate_output); counter_u64_free(ocf_retries); return (0); default: return (EOPNOTSUPP); } } static moduledata_t ktls_ocf_moduledata = { "ktls_ocf", ktls_ocf_modevent, NULL }; DECLARE_MODULE(ktls_ocf, ktls_ocf_moduledata, SI_SUB_PROTO_END, SI_ORDER_ANY);