Changeset View
Standalone View
sys/kern/kern_shutdown.c
Context not available. | |||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_ddb.h" | #include "opt_ddb.h" | ||||
#include "opt_ekcd.h" | |||||
#include "opt_kdb.h" | #include "opt_kdb.h" | ||||
#include "opt_panic.h" | #include "opt_panic.h" | ||||
#include "opt_sched.h" | #include "opt_sched.h" | ||||
Context not available. | |||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/watchdog.h> | #include <sys/watchdog.h> | ||||
#include <crypto/rijndael/rijndael-api-fst.h> | |||||
#include <crypto/sha2/sha2.h> | |||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
Context not available. | |||||
SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, | SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, | ||||
&suspend_blocked, 0, "Block suspend due to a pending shutdown"); | &suspend_blocked, 0, "Block suspend due to a pending shutdown"); | ||||
#ifdef EKCD | |||||
FEATURE(ekcd, "Encrypted kernel crash dumps support"); | |||||
MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); | |||||
struct kerneldumpcrypto { | |||||
uint8_t kdc_encryption; | |||||
uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; | |||||
keyInstance kdc_ki; | |||||
cipherInstance kdc_ci; | |||||
off_t kdc_nextoffset; | |||||
uint32_t kdc_dumpkeysize; | |||||
uint32_t kdc_encryptedkeysize; | |||||
uint8_t kdc_encryptedkey[]; | |||||
}; | |||||
#endif | |||||
/* | /* | ||||
* Variable panicstr contains argument to first call to panic; used as flag | * Variable panicstr contains argument to first call to panic; used as flag | ||||
* to indicate that the kernel has already called panic. | * to indicate that the kernel has already called panic. | ||||
Context not available. | |||||
SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, | SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, | ||||
dumpdevname, 0, "Device for kernel dumps"); | dumpdevname, 0, "Device for kernel dumps"); | ||||
#ifdef EKCD | |||||
static struct kerneldumpcrypto * | |||||
kerneldumpcrypto_create(uint8_t encryption, const uint8_t *key, | |||||
uint32_t encryptedkeysize, const uint8_t *encryptedkey) | |||||
{ | |||||
struct kerneldumpcrypto *kdc; | |||||
kdc = malloc(sizeof(*kdc) + encryptedkeysize, M_EKCD, | |||||
M_WAITOK | M_ZERO); | |||||
arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); | |||||
cem: `dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize);` | |||||
kdc->kdc_encryption = encryption; | |||||
Done Inline ActionsDo we care if dumpkey is block aligned (kdc isn't likely to be block sized)? cem: Do we care if dumpkey is block aligned (kdc isn't likely to be block sized)? | |||||
Not Done Inline Actionskdc_dumpkey has struct kerneldumpkey[] type now. def: kdc_dumpkey has struct kerneldumpkey[] type now. | |||||
switch (kdc->kdc_encryption) { | |||||
case KERNELDUMP_ENC_AES_256_CBC: | |||||
if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) | |||||
goto failed; | |||||
break; | |||||
default: | |||||
goto failed; | |||||
} | |||||
kdc->kdc_encryptedkeysize = encryptedkeysize; | |||||
bcopy(encryptedkey, kdc->kdc_encryptedkey, kdc->kdc_encryptedkeysize); | |||||
kdc->kdc_dumpkeysize = (sizeof(kdc->kdc_encryption) + | |||||
sizeof(kdc->kdc_iv) + sizeof(kdc->kdc_encryptedkeysize) + | |||||
kdc->kdc_encryptedkeysize + KERNELDUMP_BLOCK_SIZE - 1) / | |||||
Done Inline ActionsThis alias increases the alignment requirements and may produce bus errors outside of x86, I think. Instead, maybe make the type of the kdc_dumpkey array struct kerneldumpkey[] so that the struct member is suitably aligned. cem: This alias increases the alignment requirements and may produce bus errors outside of x86, I… | |||||
KERNELDUMP_BLOCK_SIZE * KERNELDUMP_BLOCK_SIZE; | |||||
return (kdc); | |||||
failed: | |||||
explicit_bzero(kdc, sizeof(*kdc) + encryptedkeysize); | |||||
free(kdc, M_EKCD); | |||||
return (NULL); | |||||
} | |||||
#endif /* EKCD */ | |||||
int | |||||
kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) | |||||
{ | |||||
#ifndef EKCD | |||||
return (0); | |||||
#else | |||||
uint8_t hash[SHA256_DIGEST_LENGTH]; | |||||
SHA256_CTX ctx; | |||||
int error; | |||||
error = 0; | |||||
if (kdc == NULL) | |||||
return (0); | |||||
/* | |||||
* When a user enters ddb it can write a crash dump multiple times. | |||||
* Each time it should be encrypted using a different IV. | |||||
*/ | |||||
SHA256_Init(&ctx); | |||||
SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); | |||||
SHA256_Final(hash, &ctx); | |||||
bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); | |||||
cemUnsubmitted Not Done Inline ActionsWhy should it use a different IV (maybe for network dumps, which we don't support today)? And if it should use a different IV, why make that subsequent IV predictable (sha256 of the previous one) instead of arc4random()? cem: Why should it use a different IV (maybe for network dumps, which we don't support today)? And… | |||||
pjdUnsubmitted Not Done Inline ActionsWe don't want the same IV, because user may enter DDB, do a crash dump, change dump device and do it again. We would end up with two crash dumps encrypted with the same IV. Or even if user won't switch dump device, so other user may read its content after first dump and before second dump. I also remember there was a reason to use SHA256 instead of arc4random(). Maybe arc4random() may need scheduler? pjd: We don't want the same IV, because user may enter DDB, do a crash dump, change dump device and… | |||||
switch (kdc->kdc_encryption) { | |||||
case KERNELDUMP_ENC_AES_256_CBC: | |||||
if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, | |||||
kdc->kdc_iv) <= 0) { | |||||
error = EINVAL; | |||||
goto failed; | |||||
} | |||||
break; | |||||
default: | |||||
error = EINVAL; | |||||
goto failed; | |||||
} | |||||
kdc->kdc_nextoffset = 0; | |||||
failed: | |||||
cemUnsubmitted Done Inline ActionsGiven this is also the successful exit path, maybe rename failed label to just out. cem: Given this is also the successful exit path, maybe rename `failed` label to just `out`. | |||||
explicit_bzero(hash, sizeof(hash)); | |||||
return (error); | |||||
#endif | |||||
} | |||||
uint32_t | |||||
kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) | |||||
{ | |||||
#ifdef EKCD | |||||
if (kdc == NULL) | |||||
return (0); | |||||
return (kdc->kdc_dumpkeysize); | |||||
#else | |||||
return (0); | |||||
#endif | |||||
} | |||||
/* Registration of dumpers */ | /* Registration of dumpers */ | ||||
int | int | ||||
set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) | set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, | ||||
uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, | |||||
const uint8_t *encryptedkey) | |||||
{ | { | ||||
size_t wantcopy; | size_t wantcopy; | ||||
int error; | int error; | ||||
Context not available. | |||||
return (error); | return (error); | ||||
if (di == NULL) { | if (di == NULL) { | ||||
bzero(&dumper, sizeof dumper); | error = 0; | ||||
dumpdevname[0] = '\0'; | goto cleanup; | ||||
return (0); | |||||
} | } | ||||
if (dumper.dumper != NULL) | if (dumper.dumper != NULL) | ||||
return (EBUSY); | return (EBUSY); | ||||
dumper = *di; | dumper = *di; | ||||
dumper.kdc = NULL; | |||||
if (encryption != KERNELDUMP_ENC_NONE) { | |||||
#ifdef EKCD | |||||
dumper.kdc = kerneldumpcrypto_create(encryption, key, | |||||
encryptedkeysize, encryptedkey); | |||||
if (dumper.kdc == NULL) { | |||||
error = EINVAL; | |||||
goto cleanup; | |||||
} | |||||
#else | |||||
error = EOPNOTSUPP; | |||||
goto cleanup; | |||||
#endif | |||||
} | |||||
wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); | wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); | ||||
if (wantcopy >= sizeof(dumpdevname)) { | if (wantcopy >= sizeof(dumpdevname)) { | ||||
printf("set_dumper: device name truncated from '%s' -> '%s'\n", | printf("set_dumper: device name truncated from '%s' -> '%s'\n", | ||||
devname, dumpdevname); | devname, dumpdevname); | ||||
} | } | ||||
return (0); | return (0); | ||||
cleanup: | |||||
#ifdef EKCD | |||||
if (dumper.kdc != NULL) { | |||||
explicit_bzero(dumper.kdc, sizeof(*dumper.kdc) + | |||||
dumper.kdc->kdc_encryptedkeysize); | |||||
free(dumper.kdc, M_EKCD); | |||||
} | |||||
#endif | |||||
explicit_bzero(&dumper, sizeof(dumper)); | |||||
dumpdevname[0] = '\0'; | |||||
return (error); | |||||
} | } | ||||
/* Call dumper with bounds checking. */ | static int | ||||
int | dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) | ||||
dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, | |||||
off_t offset, size_t length) | |||||
{ | { | ||||
if (length != 0 && (offset < di->mediaoffset || | if (length != 0 && (offset < di->mediaoffset || | ||||
Context not available. | |||||
(uintmax_t)length, (intmax_t)di->mediasize); | (uintmax_t)length, (intmax_t)di->mediasize); | ||||
return (ENOSPC); | return (ENOSPC); | ||||
} | } | ||||
return (0); | |||||
} | |||||
#ifdef EKCD | |||||
static int | |||||
dump_encrypt(struct kerneldumpcrypto *kdc, const uint8_t *src, uint8_t *dst, | |||||
size_t size) | |||||
{ | |||||
switch (kdc->kdc_encryption) { | |||||
case KERNELDUMP_ENC_AES_256_CBC: | |||||
if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, src, | |||||
8 * size, dst) <= 0) { | |||||
return (EIO); | |||||
} | |||||
if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, | |||||
dst + size - 16 /* IV size for AES-256-CBC */) <= 0) { | |||||
return (EIO); | |||||
} | |||||
break; | |||||
default: | |||||
return (EINVAL); | |||||
} | |||||
return (0); | |||||
} | |||||
/* Encrypt data and call dumper. */ | |||||
static int | |||||
dump_encrypted_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, | |||||
off_t offset, size_t length) | |||||
{ | |||||
uint8_t buf[2 * KERNELDUMP_BLOCK_SIZE]; | |||||
cemUnsubmitted Done Inline ActionsMaybe make this static. cem: Maybe make this `static`. | |||||
struct kerneldumpcrypto *kdc; | |||||
int error; | |||||
size_t nbytes; | |||||
off_t nextoffset; | |||||
kdc = di->kdc; | |||||
error = dump_check_bounds(di, offset, length); | |||||
if (error != 0) | |||||
return (error); | |||||
/* Signal completion. */ | |||||
if (virtual == NULL && physical == 0 && offset == 0 && length == 0) { | |||||
return (di->dumper(di->priv, virtual, physical, offset, | |||||
length)); | |||||
} | |||||
/* Data have to be aligned to block size. */ | |||||
if ((length % KERNELDUMP_BLOCK_SIZE) != 0) | |||||
return (EINVAL); | |||||
/* | |||||
* Data have to be written continuously becase we're encrypting using | |||||
* CBC mode which has this assumption. | |||||
*/ | |||||
if (kdc->kdc_nextoffset != 0 && kdc->kdc_nextoffset != offset) | |||||
return (EINVAL); | |||||
nextoffset = offset + (off_t)length; | |||||
while (length > 0) { | |||||
if (length >= sizeof(buf)) | |||||
nbytes = sizeof(buf); | |||||
else | |||||
nbytes = length; | |||||
cemUnsubmitted Done Inline Actionsnbytes = MIN(length, sizeof(buf)); cem: nbytes = MIN(length, sizeof(buf)); | |||||
bcopy(virtual, buf, nbytes); | |||||
if (dump_encrypt(kdc, buf, buf, nbytes) != 0) | |||||
cemUnsubmitted Done Inline ActionsDoes rijndael_blockEncrypt really support src == dst? Why have dump_encrypt take a src and dst parameter at all if we're only going to use it with both equal? cem: Does `rijndael_blockEncrypt` really support src == dst?
Why have dump_encrypt take a src and… | |||||
defAuthorUnsubmitted Done Inline ActionsYes, rijndael_blockEncrypt supports it. It's already used in gbde(8) in g_bde_crypt_delete (sys/geom/bde/g_bde_crypt.c) via AES_encrypt. def: Yes, rijndael_blockEncrypt supports it. It's already used in gbde(8) in g_bde_crypt_delete… | |||||
cemUnsubmitted Done Inline Actions
cem: > Why have dump_encrypt take a src and dst parameter at all if we're only going to use it with… | |||||
cemUnsubmitted Done Inline ActionsNevermind, I see dump_encrypt() was changed to only take the one pointer. Sorry about that. cem: Nevermind, I see dump_encrypt() was changed to only take the one pointer. Sorry about that. | |||||
return (EIO); | |||||
error = di->dumper(di->priv, buf, physical, offset, nbytes); | |||||
if (error != 0) | |||||
return (error); | |||||
offset += nbytes; | |||||
virtual = (void *)((uint8_t *)virtual + nbytes); | |||||
length -= nbytes; | |||||
} | |||||
kdc->kdc_nextoffset = nextoffset; | |||||
return (0); | |||||
} | |||||
#endif /* EKCD */ | |||||
/* Call dumper with bounds checking. */ | |||||
static int | |||||
dump_raw_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, | |||||
off_t offset, size_t length) | |||||
{ | |||||
int error; | |||||
error = dump_check_bounds(di, offset, length); | |||||
if (error != 0) | |||||
return (error); | |||||
return (di->dumper(di->priv, virtual, physical, offset, length)); | return (di->dumper(di->priv, virtual, physical, offset, length)); | ||||
} | } | ||||
int | |||||
dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, | |||||
off_t offset, size_t length) | |||||
{ | |||||
#ifdef EKCD | |||||
if (di->kdc != NULL) { | |||||
return (dump_encrypted_write(di, virtual, physical, offset, | |||||
length)); | |||||
} | |||||
#endif | |||||
return (dump_raw_write(di, virtual, physical, offset, length)); | |||||
} | |||||
int | |||||
dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, | |||||
vm_offset_t physical, off_t offset) | |||||
{ | |||||
return (dump_raw_write(di, kdh, physical, offset, sizeof(*kdh))); | |||||
} | |||||
int | |||||
dump_write_key(struct dumperinfo *di, vm_offset_t physical, off_t offset) | |||||
{ | |||||
#ifndef EKCD | |||||
return (0); | |||||
#else /* EKCD */ | |||||
uint8_t *buf, *p; | |||||
struct kerneldumpcrypto *kdc; | |||||
uint32_t encryptedkeysize; | |||||
int ret; | |||||
kdc = di->kdc; | |||||
if (kdc == NULL) | |||||
return (0); | |||||
buf = malloc(kdc->kdc_dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); | |||||
p = buf; | |||||
*p = kdc->kdc_encryption; | |||||
p += sizeof(kdc->kdc_encryption); | |||||
bcopy(kdc->kdc_iv, p, sizeof(kdc->kdc_iv)); | |||||
p += sizeof(kdc->kdc_iv); | |||||
encryptedkeysize = htod32(kdc->kdc_encryptedkeysize); | |||||
bcopy(&encryptedkeysize, p, sizeof(encryptedkeysize)); | |||||
p += sizeof(encryptedkeysize); | |||||
bcopy(kdc->kdc_encryptedkey, p, kdc->kdc_encryptedkeysize); | |||||
p += kdc->kdc_encryptedkeysize; | |||||
cemUnsubmitted Done Inline ActionsEw. Can you just use an ordinary struct for this like kerneldumpheader does? cem: Ew. Can you just use an ordinary struct for this like `kerneldumpheader` does? | |||||
defAuthorUnsubmitted Done Inline ActionsWe'd like to write packed data and an ordinary structure can have gaps between fields. geli does similar thing in geom/eli/g_eli.h to encode metadata so it can be used by various architectures. def: We'd like to write packed data and an ordinary structure can have gaps between fields. geli… | |||||
cemUnsubmitted Done Inline Actions
So use __packed from sys/cdefs.h. cem: > We'd like to write packed data and an ordinary structure can have gaps between fields
So use… | |||||
defAuthorUnsubmitted Done Inline ActionsWouldn't it create bus errors on some architectures, e.g. SPARC [1]? It would be nice to have a way to save a crash dump generated by another architecture. https://docs.oracle.com/cd/E60778_01/html/E60745/bjaby.html#OSSCGbjacr def: Wouldn't it create bus errors on some architectures, e.g. SPARC [1]? It would be nice to have a… | |||||
cemUnsubmitted Done Inline ActionsNope. The compiler is responsible for generating appropriately sized and aligned memory operations and truncating as needed. (I don't think that particular piece of Solaris documentation is relevant, but maybe I'm missing something.) cem: Nope. The compiler is responsible for generating appropriately sized and aligned memory… | |||||
defAuthorUnsubmitted Done Inline ActionsThis is a quote from the link I sent:
As far as I understand SPARC has strict alignment and it won't be possible to save a crash dump generated by an amd64 machine on a SPARC machine because of alignment differences but correct me if I'm wrong. def: This is a quote from the link I sent:
> Note - If you use #pragma pack to align struct or union… | |||||
cemUnsubmitted Done Inline ActionsDoes Solaris' #pragma pack have *any* relationship to FreeBSD/Clang(/GCC on Sparc64)'s __packed aka __attribute__((__packed__))? I don't think it does. As far as I understand it, it's completely invalid for a compiler to produce assembly that will generate bus errors when the struct itself is aligned, even if packed. The standards don't say anything about packed, of course, so I don't have anything authoritative to point to here. Strict alignment just means the compiler generates assembly instructions to read the aligned DWORD(s) and uses binary shifts or ANDs to extract the relevant field from the aligned memory read. There is no reason packed should generate bus errors. That would be a compiler bug. cem: Does Solaris' `#pragma pack` have *any* relationship to FreeBSD/Clang(/GCC on Sparc64)'s… | |||||
defAuthorUnsubmitted Done Inline ActionsThanks for the explanation, Conrad. It's fixed now. def: Thanks for the explanation, Conrad. It's fixed now. | |||||
ret = dump_raw_write(di, buf, physical, offset, kdc->kdc_dumpkeysize); | |||||
explicit_bzero(buf, kdc->kdc_dumpkeysize); | |||||
free(buf, M_EKCD); | |||||
return (ret); | |||||
#endif /* !EKCD */ | |||||
} | |||||
void | void | ||||
mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, | mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, | ||||
uint64_t dumplen, uint32_t blksz) | uint64_t dumplen, uint32_t dumpkeysize, uint32_t blksz) | ||||
{ | { | ||||
bzero(kdh, sizeof(*kdh)); | bzero(kdh, sizeof(*kdh)); | ||||
Context not available. | |||||
kdh->architectureversion = htod32(archver); | kdh->architectureversion = htod32(archver); | ||||
kdh->dumplength = htod64(dumplen); | kdh->dumplength = htod64(dumplen); | ||||
kdh->dumptime = htod64(time_second); | kdh->dumptime = htod64(time_second); | ||||
kdh->dumpkeysize = htod32(dumpkeysize); | |||||
kdh->blocksize = htod32(blksz); | kdh->blocksize = htod32(blksz); | ||||
strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); | strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); | ||||
strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); | strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); | ||||
Context not available. | |||||
Done Inline ActionsI just start wonder, is WAITOK here is a good thing? Maybe we should change it to the NOWAIT and dump some message like "No enough memory" or so. The doc say as well that: Note that M_NOWAIT is required when running in an interrupt context. I'm not expert but this is interrupt context, right? oshogbo: I just start wonder, is WAITOK here is a good thing?
Our process was interrupted by panic so we… | |||||
Done Inline ActionsNo, this is not an interrupt context, really. If you add something like 'NMI interrupt context' definition, then it would be it, but the definition is non-sensical, because it is really any point in the code. Spinlocks cannot protect such context against interrupted execution, and this is the main and fatal difference with the usual interrupt context definition. That said, malloc/UMA data structures are protected by normal (AKA sleepable) mutexes and thus malloc even with M_NOWAIT flag cannot be used from an interrupt context. It can be used from the context of interrupt thread, but this is much harder requirement. In other words, malloc(9) in any form is highly undesirable in the dumping path. kib: No, this is not an interrupt context, really. If you add something like 'NMI interrupt… | |||||
Done Inline ActionsSo do you think that we should have static buffer for it or malloc this memory earlier? oshogbo: So do you think that we should have static buffer for it or malloc this memory earlier? | |||||
Done Inline ActionsThanks Mariusz and Konstantin for discussing this issue. I changed the code to allocate a buffer during a dump device setup. def: Thanks Mariusz and Konstantin for discussing this issue. I changed the code to allocate a… | |||||
Done Inline ActionsIt would be better that way, as @def said to change. Note that drivers which handle dumpdev might have different idea of dump time, e.g. they could use busdma, which often allocates memory. Still, less reliance on the working kernel subsystems for dump, more reliable it is. My opinion is that such non-trivial things from ddb/panic context should be performed by mechanism like kexec. kib: It would be better that way, as @def said to change. Note that drivers which handle dumpdev… |
dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize);