Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/kern_dump.c
Show All 25 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/conf.h> | #include <sys/conf.h> | ||||
#include <sys/cons.h> | #include <sys/cons.h> | ||||
#include <sys/kdb.h> | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/kerneldump.h> | #include <sys/kerneldump.h> | ||||
#include <sys/malloc.h> | |||||
#include <sys/msgbuf.h> | #include <sys/msgbuf.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/watchdog.h> | #include <sys/watchdog.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_param.h> | #include <vm/vm_param.h> | ||||
#include <vm/vm_page.h> | #include <vm/vm_page.h> | ||||
#include <vm/vm_phys.h> | #include <vm/vm_phys.h> | ||||
▲ Show 20 Lines • Show All 246 Lines • ▼ Show 20 Lines | dumpsys_generic(struct dumperinfo *di) | ||||
Elf_Ehdr ehdr; | Elf_Ehdr ehdr; | ||||
uint64_t dumpsize; | uint64_t dumpsize; | ||||
off_t hdrgap; | off_t hdrgap; | ||||
size_t hdrsz; | size_t hdrsz; | ||||
int error; | int error; | ||||
#if MINIDUMP_PAGE_TRACKING == 1 | #if MINIDUMP_PAGE_TRACKING == 1 | ||||
if (do_minidump) | if (do_minidump) | ||||
return (minidumpsys(di)); | return (minidumpsys(di, false)); | ||||
#endif | #endif | ||||
bzero(&ehdr, sizeof(ehdr)); | bzero(&ehdr, sizeof(ehdr)); | ||||
ehdr.e_ident[EI_MAG0] = ELFMAG0; | ehdr.e_ident[EI_MAG0] = ELFMAG0; | ||||
ehdr.e_ident[EI_MAG1] = ELFMAG1; | ehdr.e_ident[EI_MAG1] = ELFMAG1; | ||||
ehdr.e_ident[EI_MAG2] = ELFMAG2; | ehdr.e_ident[EI_MAG2] = ELFMAG2; | ||||
ehdr.e_ident[EI_MAG3] = ELFMAG3; | ehdr.e_ident[EI_MAG3] = ELFMAG3; | ||||
ehdr.e_ident[EI_CLASS] = ELF_CLASS; | ehdr.e_ident[EI_CLASS] = ELF_CLASS; | ||||
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | if (!progress_track[i].visited) { | ||||
progress_track[i].visited = true; | progress_track[i].visited = true; | ||||
printf("..%d%%", sofar); | printf("..%d%%", sofar); | ||||
} | } | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
int | int | ||||
minidumpsys(struct dumperinfo *di) | minidumpsys(struct dumperinfo *di, bool livedump) | ||||
{ | { | ||||
struct minidumpstate state; | struct minidumpstate state; | ||||
struct msgbuf mb_copy; | |||||
char *msg_ptr; | |||||
size_t sz; | |||||
int error; | int error; | ||||
if (livedump) { | |||||
KASSERT(!dumping, ("live dump invoked from incorrect context")); | |||||
/* | |||||
* Before invoking cpu_minidumpsys() on the live system, we | |||||
* must snapshot some required global state: the message | |||||
markj: The bitset may change even during copying.
I would suggest explaining a bit further why this… | |||||
* buffer, and the page dump bitset. They may be modified at | |||||
* any moment, so for the sake of the live dump it is best to | |||||
* have an unchanging snapshot to work with. Both are included | |||||
* as part of the dump and consumed by userspace tools. | |||||
* | |||||
* Other global state important to the minidump code is the | |||||
* dump_avail array and the kernel's page tables, but snapshots | |||||
* are not taken of these. For one, dump_avail[] is expected | |||||
* not to change after boot. Snapshotting the kernel page | |||||
* tables would involve an additional walk, so this is avoided | |||||
Not Done Inline ActionsWhat about a kernel that is configured to dump core and reboot automatically upon a panic? Is kdb_active == 1 in that case? I think not. markj: What about a kernel that is configured to dump core and reboot automatically upon a panic? Is… | |||||
Done Inline ActionsRight, I think what we want is kdb_active || KERNEL_PANICKED(). mhorne: Right, I think what we want is `kdb_active || KERNEL_PANICKED()`. | |||||
Not Done Inline ActionsIMO it would be reasonable to also/instead assert dumping (and assert !dumping in the live case). markj: IMO it would be reasonable to also/instead assert `dumping` (and assert `!dumping` in the live… | |||||
* too. | |||||
* | |||||
* This means live dumps are best effort, and the result may or | |||||
* may not be usable; there are no guarantees about the | |||||
* consistency of the dump's contents. Any of the following | |||||
* (and likely more) may affect the live dump: | |||||
* | |||||
* - Data may be modified, freed, or remapped during the | |||||
* course of the dump, such that the contents written out | |||||
* are partially or entirely unrecognizable. This means | |||||
* valid references may point to destroyed/mangled objects, | |||||
* and vice versa. | |||||
* | |||||
* - The dumped context of any threads that ran during the | |||||
* dump process may be unreliable. | |||||
* | |||||
* - The set of kernel page tables included in the dump likely | |||||
* won't correspond exactly to the copy of the dump bitset. | |||||
* This means some pages will be dumped without any way to | |||||
* locate them, and some pages may not have been dumped | |||||
* despite appearing as if they should. | |||||
*/ | |||||
msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK); | |||||
msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr); | |||||
state.msgbufp = &mb_copy; | |||||
sz = BITSET_SIZE(vm_page_dump_pages); | |||||
state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK); | |||||
BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset); | |||||
} else { | |||||
KASSERT(dumping, ("minidump invoked outside of doadump()")); | |||||
/* Use the globals. */ | |||||
state.msgbufp = msgbufp; | state.msgbufp = msgbufp; | ||||
state.dump_bitset = vm_page_dump; | state.dump_bitset = vm_page_dump; | ||||
} | |||||
error = cpu_minidumpsys(di, &state); | error = cpu_minidumpsys(di, &state); | ||||
if (livedump) { | |||||
free(msg_ptr, M_TEMP); | |||||
free(state.dump_bitset, M_TEMP); | |||||
} | |||||
return (error); | return (error); | ||||
} | } | ||||
#endif /* MINIDUMP_PAGE_TRACKING == 1 */ | #endif /* MINIDUMP_PAGE_TRACKING == 1 */ |
The bitset may change even during copying.
I would suggest explaining a bit further why this is ok and in particular try to characterize the inconsistencies that can result.