Changeset View
Standalone View
sys/kern/kern_dump.c
/*- | /*- | ||||
* Copyright (c) 2002 Marcel Moolenaar | * Copyright (c) 2002 Marcel Moolenaar | ||||
* Copyright (c) 2015 EMC Corporation | |||||
* All rights reserved. | * All rights reserved. | ||||
* | * | ||||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | ||||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | ||||
* are met: | * are met: | ||||
* | * | ||||
* 1. Redistributions of source code must retain the above copyright | * 1. Redistributions of source code must retain the above copyright | ||||
* notice, this list of conditions and the following disclaimer. | * notice, this list of conditions and the following disclaimer. | ||||
Show All 12 Lines | |||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | ||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_ddb.h" | #include "opt_ddb.h" | ||||
#include "opt_gzio.h" | |||||
#include "opt_watchdog.h" | #include "opt_watchdog.h" | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/conf.h> | #include <sys/conf.h> | ||||
#include <sys/cons.h> | #include <sys/cons.h> | ||||
#include <sys/gzio.h> | |||||
#include <sys/jail.h> | #include <sys/jail.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/kerneldump.h> | #include <sys/kerneldump.h> | ||||
#include <sys/malloc.h> | |||||
#include <sys/priv.h> | #include <sys/priv.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#ifdef SW_WATCHDOG | #ifdef SW_WATCHDOG | ||||
#include <sys/watchdog.h> | #include <sys/watchdog.h> | ||||
#endif | #endif | ||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
Show All 26 Lines | |||||
/* Context information for dump-debuggers. */ | /* Context information for dump-debuggers. */ | ||||
static struct pcb dumppcb; /* Registers. */ | static struct pcb dumppcb; /* Registers. */ | ||||
lwpid_t dumptid; /* Thread ID. */ | lwpid_t dumptid; /* Thread ID. */ | ||||
/* Dump state. */ | /* Dump state. */ | ||||
static off_t dumpoff; | static off_t dumpoff; | ||||
static char buffer[DEV_BSIZE]; | static char buffer[DEV_BSIZE]; | ||||
static size_t fragsz; | static size_t fragsz; | ||||
#ifdef GZIO | |||||
static struct gzio_stream *gzs; | |||||
static uint8_t *gzbuffer; | |||||
#endif | |||||
static char dumpdevname[sizeof(((struct cdev *)NULL)->si_name)]; | static char dumpdevname[sizeof(((struct cdev *)NULL)->si_name)]; | ||||
SYSCTL_DECL(_kern_shutdown); | SYSCTL_DECL(_kern_shutdown); | ||||
SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, dumpdevname, 0, | SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, dumpdevname, 0, | ||||
"Device for kernel dumps"); | "Device for kernel dumps"); | ||||
static int compress_kernel_dumps = 0; | |||||
#ifdef GZIO | |||||
static int compress_kernel_dumps_gzlevel = 6; | |||||
SYSCTL_INT(_kern, OID_AUTO, compress_kernel_dumps_gzlevel, CTLFLAG_RW, | |||||
&compress_kernel_dumps_gzlevel, 0, | |||||
"Kernel crash dump compression level"); | |||||
static int sysctl_dump_gz_toggle(SYSCTL_HANDLER_ARGS); | |||||
SYSCTL_PROC(_kern, OID_AUTO, compress_kernel_dumps, CTLFLAG_RW | CTLTYPE_INT, | |||||
&compress_kernel_dumps, 0, sysctl_dump_gz_toggle, "I", | |||||
"Enable compressed kernel crash dumps"); | |||||
static int dump_gz_configure(struct dumperinfo *); | |||||
static void dump_gz_disable(void); | |||||
static int dump_gz_write_cb(void *, size_t, off_t, void *); | |||||
cem: Whitespace looks off here, but that may just be Phabricator. | |||||
Not Done Inline ActionsYeah, it's phabricator. markj: Yeah, it's phabricator. | |||||
static int | |||||
sysctl_dump_gz_toggle(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
int error, value; | |||||
value = *(int *)arg1; | |||||
error = sysctl_handle_int(oidp, &value, 0, req); | |||||
Not Done Inline ActionsWhat's this construct for? cem: What's this construct for? | |||||
Not Done Inline Actionssysctl_handle_int? It does two things here: if the user is reading the sysctl, it returns the current value of compress_kernel_dumps. And if the user is modifying it, it copies the original value out and copies the new value into the local var "value". markj: sysctl_handle_int? It does two things here: if the user is reading the sysctl, it returns the… | |||||
Not Done Inline ActionsNo, why &value instead of just arg1? cem: No, why `&value` instead of just `arg1`? | |||||
Not Done Inline ActionsThat would write the new value to compress_kernel_cores before actually configuring compression. markj: That would write the new value to compress_kernel_cores before actually configuring compression. | |||||
if (error != 0 || req->newptr == NULL) | |||||
return (error); | |||||
if (value == 0) { | |||||
compress_kernel_dumps = 0; | |||||
dump_gz_disable(); | |||||
} else if (compress_kernel_dumps == 0) { | |||||
if (strlen(dumpdevname) > 0) | |||||
error = dump_gz_configure(&dumper); | |||||
if (error == 0) | |||||
compress_kernel_dumps = 1; | |||||
} | |||||
return (error); | |||||
} | |||||
#endif /* GZIO */ | |||||
/* | |||||
* When writing a kernel dump to disk, we also include dump metadata that is | |||||
* used by savecore(8) to locate and recover the dump. This metadata is | |||||
* represented by the struct kerneldumpheader type. When a kernel dump is | |||||
* complete, two copies of the header are written: one to the last sector of | |||||
* the dump medium, and one immediately before the beginning of the dump. The | |||||
* last header is used to locate the first header, and thus, the dump itself. | |||||
* | |||||
* When the dump is written without compression, things are simple: we know | |||||
* exactly how long the dump will be, so the initial offset in the medium can be | |||||
* chosen such that the end of the dump is flush with the terminating header. | |||||
* In this case, the "extent" of the dump (the space between the two headers) is | |||||
* equal to its length. In the compressed case we don't know the dump length | |||||
* a priori, so we write the dump starting at the same offset as we would in the | |||||
* uncompressed case. Once the dump is complete, we know its compressed length, | |||||
* so the dump headers are updated and written to the medium. In this case, the | |||||
* extent tells savecore(8) where to find the beginning of the dump, and the | |||||
* length tells it how far into the extent it must read to recover the dump. | |||||
Not Done Inline ActionsWhat about if the space is too small for an uncompressed dump? Follow-up question: Why don't we just start writing compressed dumps at the beginning of the medium? Edit: Below in dump_start(), that is exactly what we do, but only if the space would be too small. I'd prefer to simplify things a little and just start writing at the beginning always. *Shrug*. cem: What about if the space is too small for an uncompressed dump?
Follow-up question: Why don't… | |||||
Not Done Inline ActionsIf the space is too small, we return an error before attempting to write anything. If it's too small for even a compressed dump, we won't find that out until we've hit the end of the partition. That's not ideal, but I don't see a good way around that. Writing to the end of the device comes from the fact that the dump device is usually also the swap partition. When the system boots up after a panic, it'll fsck the local filesystems before it recovers the dump, and fsck could swap if it's dealing with a large filesystem and the amount of physical memory available is small. Writing the dump to the beginning of the device increases the risk that it'll be overwritten with swapped pages. I suppose there could be other processes that cause this, but fsck is the main example I think. So, this is just a small robustness measure. Obviously it's not foolproof, but everything involved in kernel dumps is best-effort. markj: If the space is too small, we return an error before attempting to write anything. If it's too… | |||||
*/ | |||||
int | int | ||||
doadump(boolean_t textdump) | doadump(boolean_t textdump) | ||||
{ | { | ||||
boolean_t coredump; | boolean_t coredump; | ||||
int error; | int error; | ||||
error = 0; | error = 0; | ||||
if (dumping) | if (dumping) | ||||
Show All 21 Lines | |||||
/* Perform any needed initialization in preparation for a kernel dump. */ | /* Perform any needed initialization in preparation for a kernel dump. */ | ||||
int | int | ||||
dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) | dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) | ||||
{ | { | ||||
uint64_t length; | uint64_t length; | ||||
length = dtoh64(kdh->dumplength); | length = dtoh64(kdh->dumplength); | ||||
if (di->mediasize < SIZEOF_METADATA + length + sizeof(*kdh) * 2) | if (di->mediasize < SIZEOF_METADATA + length + 2 * sizeof(*kdh)) { | ||||
return (E2BIG); | if (compress_kernel_dumps) | ||||
/* | |||||
* We don't yet know how much space the compressed dump | |||||
* will occupy, so try to use the whole swap partition | |||||
* (minus the first 64KB). If that doesn't turn out to | |||||
* be enough, the bounds checking in dump_write_raw() | |||||
* will catch us. | |||||
*/ | |||||
length = di->mediasize - SIZEOF_METADATA - | |||||
2 * sizeof(*kdh); | |||||
else | |||||
return (ENOSPC); | |||||
} | |||||
/* | |||||
* The initial offset at which we're going to write the dump (excluding | |||||
* the leading kernel dump header). | |||||
*/ | |||||
dumpoff = di->mediaoffset + di->mediasize - length - sizeof(*kdh); | dumpoff = di->mediaoffset + di->mediasize - length - sizeof(*kdh); | ||||
kdh->dumpextent = htod64(length); | |||||
return (0); | return (0); | ||||
} | } | ||||
/* Complete a kernel dump. */ | /* Complete a kernel dump. */ | ||||
int | int | ||||
dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) | dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) | ||||
{ | { | ||||
uint64_t length; | uint64_t extent; | ||||
int error; | int error; | ||||
dumpoff = 0; | extent = dtoh64(kdh->dumpextent); | ||||
length = dtoh64(kdh->dumplength); | #ifdef GZIO | ||||
if (compress_kernel_dumps) { | |||||
error = gzio_flush(gzs); | |||||
if (error != 0) | |||||
return (error); | |||||
/* | |||||
* Now that we've completed the compressed dump, we know its | |||||
* size, so update the header accordingly and recompute parity. | |||||
*/ | |||||
kdh->dumplength = htod64(dumpoff - | |||||
(di->mediaoffset + di->mediasize - extent - sizeof(*kdh))); | |||||
kdh->parity = 0; | |||||
kdh->parity = kerneldump_parity(kdh); | |||||
} | |||||
#endif | |||||
/* Write dump headers at the beginning and end of the dump extent. */ | /* Write dump headers at the beginning and end of the dump extent. */ | ||||
error = dump_write_raw(di, kdh, 0, | error = dump_write_raw(di, kdh, 0, | ||||
di->mediaoffset + di->mediasize - sizeof(*kdh), sizeof(*kdh)); | di->mediaoffset + di->mediasize - sizeof(*kdh), sizeof(*kdh)); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
error = dump_write_raw(di, kdh, 0, | error = dump_write_raw(di, kdh, 0, | ||||
di->mediaoffset + di->mediasize - length - 2 * sizeof(*kdh), | di->mediaoffset + di->mediasize - extent - 2 * sizeof(*kdh), | ||||
sizeof(*kdh)); | sizeof(*kdh)); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
/* Reset dump state. */ | |||||
#ifdef GZIO | |||||
if (compress_kernel_dumps) | |||||
gzio_reset(gzs); | |||||
#endif | |||||
dumpoff = 0; | |||||
/* Tell the dump media driver that we're done. */ | /* Tell the dump media driver that we're done. */ | ||||
return (dump_write_raw(di, NULL, 0, 0, 0)); | return (dump_write_raw(di, NULL, 0, 0, 0)); | ||||
} | } | ||||
/* Write starting at the current kernel dump offset. */ | /* Write starting at the current kernel dump offset. */ | ||||
int | int | ||||
dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, | dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, | ||||
size_t length) | size_t length) | ||||
{ | { | ||||
int error; | int error; | ||||
#ifdef GZIO | |||||
if (compress_kernel_dumps) { | |||||
/* Bounce through a buffer to avoid gzip CRC errors. */ | |||||
memmove(gzbuffer, virtual, length); | |||||
return (gzio_write(gzs, gzbuffer, length)); | |||||
} | |||||
#endif | |||||
error = dump_write_raw(di, virtual, physical, dumpoff, length); | error = dump_write_raw(di, virtual, physical, dumpoff, length); | ||||
if (error == 0) | if (error == 0) | ||||
dumpoff += length; | dumpoff += length; | ||||
return (error); | return (error); | ||||
} | } | ||||
/* Seek forward by the specified number of bytes. */ | /* Seek forward by the specified number of bytes. */ | ||||
int | int | ||||
dump_skip(struct dumperinfo *di, size_t gap) | dump_skip(struct dumperinfo *di, size_t gap) | ||||
{ | { | ||||
if (gap > di->maxiosize) | |||||
return (EINVAL); | |||||
Not Done Inline ActionsI'd probably use a different errno. EINVAL? ENXIO suggests the device disappeared. cem: I'd probably use a different errno. EINVAL? ENXIO suggests the device disappeared. | |||||
Not Done Inline ActionsThanks, EINVAL makes more sense. markj: Thanks, EINVAL makes more sense. | |||||
#ifdef GZIO | |||||
if (compress_kernel_dumps) { | |||||
memset(gzbuffer, 0, di->maxiosize); | |||||
return (gzio_write(gzs, gzbuffer, gap)); | |||||
} | |||||
#endif | |||||
dumpoff += gap; | dumpoff += gap; | ||||
return (0); | return (0); | ||||
} | } | ||||
/* Call dumper with bounds checking. */ | /* Call dumper with bounds checking. */ | ||||
int | int | ||||
dump_write_raw(struct dumperinfo *di, void *virtual, vm_offset_t physical, | dump_write_raw(struct dumperinfo *di, void *virtual, vm_offset_t physical, | ||||
off_t offset, size_t length) | off_t offset, size_t length) | ||||
{ | { | ||||
if (length != 0 && (offset < di->mediaoffset || | if (length != 0 && (offset < di->mediaoffset || | ||||
offset - di->mediaoffset + length > di->mediasize)) { | offset - di->mediaoffset + length > di->mediasize)) { | ||||
printf("Attempt to write outside dump device boundaries.\n" | printf("Attempt to write outside dump device boundaries.\n" | ||||
"offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", | "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", | ||||
(intmax_t)offset, (intmax_t)di->mediaoffset, | (intmax_t)offset, (intmax_t)di->mediaoffset, | ||||
(uintmax_t)length, (intmax_t)di->mediasize); | (uintmax_t)length, (intmax_t)di->mediasize); | ||||
return (ENOSPC); | return (ENOSPC); | ||||
} | } | ||||
return (di->dumper(di->priv, virtual, physical, offset, length)); | return (di->dumper(di->priv, virtual, physical, offset, length)); | ||||
} | } | ||||
#ifdef GZIO | |||||
static int | |||||
dump_gz_configure(struct dumperinfo *di) | |||||
{ | |||||
MPASS(gzs == NULL); | |||||
gzs = gzio_init(dump_gz_write_cb, GZIO_DEFLATE, di->maxiosize, | |||||
compress_kernel_dumps_gzlevel, di); | |||||
if (gzs == NULL) | |||||
return (EINVAL); | |||||
gzbuffer = malloc(di->maxiosize, M_TEMP, M_WAITOK | M_NODUMP); | |||||
return (0); | |||||
} | |||||
static void | |||||
dump_gz_disable(void) | |||||
{ | |||||
if (gzs != NULL) { | |||||
gzio_fini(gzs); | |||||
gzs = NULL; | |||||
} | |||||
free(gzbuffer, M_TEMP); | |||||
gzbuffer = NULL; | |||||
} | |||||
/* Write compressed data to the dump medium. */ | |||||
static int | |||||
dump_gz_write_cb(void *base, size_t length, off_t offset __unused, void *arg) | |||||
{ | |||||
struct dumperinfo *di; | |||||
int error; | |||||
di = (struct dumperinfo *)arg; | |||||
if (dumpoff % di->blocksize != 0) | |||||
/* | |||||
* A previous write caused us to end up at an unaligned offset. | |||||
Not Done Inline ActionsIs it valid to send unaligned blocks to dump_write_raw? But the interface requires block-unit sizes? Seems inconsistent. cem: Is it valid to send unaligned blocks to dump_write_raw? But the interface requires block-unit… | |||||
Not Done Inline ActionsYeah, this is somewhat weird. There's some reasoning behind it: dump_write_raw() wants blocks that are multiples of di->blocksize in size. The gzio buffer's size is di->maxiosize, which must be a multiple of di->blocksize. So dump_gz_write_cb will always be invoked with length % di->blocksize == 0 except once, when the stream is flushed at the end of the dump (i.e. in our last write). Hence the roundup(). But, we want dumpoff to contain the true length, since it'll be used to fill in the dumplength field in the header later. Otherwise savecore will read some extra garbage beyond the end of the dump, and gzip will complain when it encounters that. This at least deserves a comment. markj: Yeah, this is somewhat weird. There's some reasoning behind it: dump_write_raw() wants blocks… | |||||
* Only the final gzio flush should cause that, in which case we | |||||
* shouldn't be here. | |||||
*/ | |||||
return (EINVAL); | |||||
error = dump_write_raw(di, base, 0, dumpoff, | |||||
roundup(length, di->blocksize)); | |||||
if (error == 0) | |||||
dumpoff += length; | |||||
return (error); | |||||
} | |||||
#endif /* GZIO */ | |||||
/* Register a dumper. */ | /* Register a dumper. */ | ||||
int | int | ||||
set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) | set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) | ||||
{ | { | ||||
size_t wantcopy; | size_t wantcopy; | ||||
int error; | int error; | ||||
error = priv_check(td, PRIV_SETDUMPER); | error = priv_check(td, PRIV_SETDUMPER); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
if (di == NULL) { | if (di == NULL) { | ||||
bzero(&dumper, sizeof dumper); | bzero(&dumper, sizeof dumper); | ||||
dumpdevname[0] = '\0'; | dumpdevname[0] = '\0'; | ||||
#ifdef GZIO | |||||
if (compress_kernel_dumps) | |||||
dump_gz_disable(); | |||||
#endif | |||||
return (0); | return (0); | ||||
} | } | ||||
if (dumper.dumper != NULL) | if (dumper.dumper != NULL) | ||||
return (EBUSY); | return (EBUSY); | ||||
dumper = *di; | dumper = *di; | ||||
wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); | wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); | ||||
if (wantcopy >= sizeof(dumpdevname)) | if (wantcopy >= sizeof(dumpdevname)) | ||||
printf("set_dumper: device name truncated from '%s' -> '%s'\n", | printf("set_dumper: device name truncated from '%s' -> '%s'\n", | ||||
devname, dumpdevname); | devname, dumpdevname); | ||||
return (0); | #ifdef GZIO | ||||
if (compress_kernel_dumps) | |||||
error = dump_gz_configure(di); | |||||
#endif | |||||
return (error); | |||||
} | } | ||||
Not Done Inline ActionsWon't this panic if we switch dumpers without disabling first (MPASS(gzs == NULL) in dump_gz_configure)? cem: Won't this panic if we switch dumpers without disabling first (`MPASS(gzs == NULL)` in… | |||||
Not Done Inline ActionsNo: you can't switch dumpers without unsetting the current dumper first, which frees gzs. See the check above that returns EBUSY. markj: No: you can't switch dumpers without unsetting the current dumper first, which frees gzs. See… | |||||
void | void | ||||
mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, | mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, | ||||
uint64_t dumplen, uint32_t blksz) | uint64_t dumplen, uint32_t blksz) | ||||
{ | { | ||||
bzero(kdh, sizeof(*kdh)); | bzero(kdh, sizeof(*kdh)); | ||||
strlcpy(kdh->magic, magic, sizeof(kdh->magic)); | strlcpy(kdh->magic, magic, sizeof(kdh->magic)); | ||||
strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); | strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); | ||||
if (compress_kernel_dumps && strcmp(magic, KERNELDUMPMAGIC) == 0) | |||||
strlcpy(kdh->magic, GZDUMPMAGIC, sizeof(kdh->magic)); | |||||
else | |||||
strlcpy(kdh->magic, magic, sizeof(kdh->magic)); | |||||
kdh->version = htod32(KERNELDUMPVERSION); | kdh->version = htod32(KERNELDUMPVERSION); | ||||
kdh->architectureversion = htod32(archver); | kdh->architectureversion = htod32(archver); | ||||
kdh->dumplength = htod64(dumplen); | kdh->dumplength = htod64(dumplen); | ||||
kdh->dumpextent = kdh->dumplength; | |||||
kdh->dumptime = htod64(time_second); | kdh->dumptime = htod64(time_second); | ||||
kdh->blocksize = htod32(blksz); | kdh->blocksize = htod32(blksz); | ||||
strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); | strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); | ||||
strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); | strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); | ||||
if (panicstr != NULL) | if (panicstr != NULL) | ||||
strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); | strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); | ||||
kdh->parity = kerneldump_parity(kdh); | kdh->parity = kerneldump_parity(kdh); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 312 Lines • Show Last 20 Lines |
Whitespace looks off here, but that may just be Phabricator.