Index: head/sbin/savecore/savecore.c =================================================================== --- head/sbin/savecore/savecore.c (revision 298075) +++ head/sbin/savecore/savecore.c (revision 298076) @@ -1,861 +1,878 @@ /*- * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1986, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* The size of the buffer used for I/O. */ #define BUFFERSIZE (1024*1024) #define STATUS_BAD 0 #define STATUS_GOOD 1 #define STATUS_UNKNOWN 2 static int checkfor, compress, clear, force, keep, verbose; /* flags */ static int nfound, nsaved, nerr; /* statistics */ static int maxdumps; extern FILE *zopen(const char *, const char *); static sig_atomic_t got_siginfo; static void infohandler(int); static void printheader(xo_handle_t *xo, const struct kerneldumpheader *h, const char *device, int bounds, const int status) { uint64_t dumplen; time_t t; const char *stat_str; xo_flush_h(xo); xo_emit_h(xo, "{Lwc:Dump header from device}{:dump_device/%s}\n", device); xo_emit_h(xo, "{P: }{Lwc:Architecture}{:architecture/%s}\n", h->architecture); xo_emit_h(xo, "{P: }{Lwc:Architecture Version}{:architecture_version/%u}\n", dtoh32(h->architectureversion)); dumplen = dtoh64(h->dumplength); xo_emit_h(xo, "{P: }{Lwc:Dump Length}{:dump_length_bytes/%lld}\n", (long long)dumplen); xo_emit_h(xo, "{P: }{Lwc:Blocksize}{:blocksize/%d}\n", dtoh32(h->blocksize)); t = dtoh64(h->dumptime); xo_emit_h(xo, "{P: }{Lwc:Dumptime}{:dumptime/%s}", ctime(&t)); xo_emit_h(xo, "{P: }{Lwc:Hostname}{:hostname/%s}\n", h->hostname); xo_emit_h(xo, "{P: }{Lwc:Magic}{:magic/%s}\n", h->magic); xo_emit_h(xo, "{P: }{Lwc:Version String}{:version_string/%s}", h->versionstring); xo_emit_h(xo, "{P: }{Lwc:Panic String}{:panic_string/%s}\n", h->panicstring); xo_emit_h(xo, "{P: }{Lwc:Dump Parity}{:dump_parity/%u}\n", h->parity); xo_emit_h(xo, "{P: }{Lwc:Bounds}{:bounds/%d}\n", bounds); switch(status) { case STATUS_BAD: stat_str = "bad"; break; case STATUS_GOOD: stat_str = "good"; break; default: stat_str = "unknown"; } xo_emit_h(xo, "{P: }{Lwc:Dump Status}{:dump_status/%s}\n", stat_str); xo_flush_h(xo); } static int getbounds(void) { FILE *fp; char buf[6]; int ret; ret = 0; if ((fp = fopen("bounds", "r")) == NULL) { if (verbose) printf("unable to open bounds file, using 0\n"); return (ret); } if (fgets(buf, sizeof buf, fp) == NULL) { if (feof(fp)) syslog(LOG_WARNING, "bounds file is empty, using 0"); else syslog(LOG_WARNING, "bounds file: %s", strerror(errno)); fclose(fp); return (ret); } errno = 0; ret = (int)strtol(buf, NULL, 10); if (ret == 0 && (errno == EINVAL || errno == ERANGE)) syslog(LOG_WARNING, "invalid value found in bounds, using 0"); fclose(fp); return (ret); } static void writebounds(int bounds) { FILE *fp; if ((fp = fopen("bounds", "w")) == NULL) { syslog(LOG_WARNING, "unable to write to bounds file: %m"); return; } if (verbose) printf("bounds number: %d\n", bounds); fprintf(fp, "%d\n", bounds); fclose(fp); } static off_t file_size(const char *path) { struct stat sb; /* Ignore all errors, those file may not exists. */ if (stat(path, &sb) == -1) return (0); return (sb.st_size); } static off_t saved_dump_size(int bounds) { static char path[PATH_MAX]; off_t dumpsize; dumpsize = 0; (void)snprintf(path, sizeof(path), "info.%d", bounds); dumpsize += file_size(path); (void)snprintf(path, sizeof(path), "vmcore.%d", bounds); dumpsize += file_size(path); (void)snprintf(path, sizeof(path), "vmcore.%d.gz", bounds); dumpsize += file_size(path); (void)snprintf(path, sizeof(path), "textdump.tar.%d", bounds); dumpsize += file_size(path); (void)snprintf(path, sizeof(path), "textdump.tar.%d.gz", bounds); dumpsize += file_size(path); return (dumpsize); } static void saved_dump_remove(int bounds) { static char path[PATH_MAX]; (void)snprintf(path, sizeof(path), "info.%d", bounds); (void)unlink(path); (void)snprintf(path, sizeof(path), "vmcore.%d", bounds); (void)unlink(path); (void)snprintf(path, sizeof(path), "vmcore.%d.gz", bounds); (void)unlink(path); (void)snprintf(path, sizeof(path), "textdump.tar.%d", bounds); (void)unlink(path); (void)snprintf(path, sizeof(path), "textdump.tar.%d.gz", bounds); (void)unlink(path); } static void symlinks_remove(void) { (void)unlink("info.last"); (void)unlink("vmcore.last"); (void)unlink("vmcore.last.gz"); (void)unlink("textdump.tar.last"); (void)unlink("textdump.tar.last.gz"); } /* * Check that sufficient space is available on the disk that holds the * save directory. */ static int check_space(const char *savedir, off_t dumpsize, int bounds) { FILE *fp; off_t minfree, spacefree, totfree, needed; struct statfs fsbuf; char buf[100]; if (statfs(".", &fsbuf) < 0) { syslog(LOG_ERR, "%s: %m", savedir); exit(1); } spacefree = ((off_t) fsbuf.f_bavail * fsbuf.f_bsize) / 1024; totfree = ((off_t) fsbuf.f_bfree * fsbuf.f_bsize) / 1024; if ((fp = fopen("minfree", "r")) == NULL) minfree = 0; else { if (fgets(buf, sizeof(buf), fp) == NULL) minfree = 0; else minfree = atoi(buf); (void)fclose(fp); } needed = dumpsize / 1024 + 2; /* 2 for info file */ needed -= saved_dump_size(bounds); if ((minfree > 0 ? spacefree : totfree) - needed < minfree) { syslog(LOG_WARNING, "no dump, not enough free space on device (%lld available, need %lld)", (long long)(minfree > 0 ? spacefree : totfree), (long long)needed); return (0); } if (spacefree - needed < 0) syslog(LOG_WARNING, "dump performed, but free space threshold crossed"); return (1); } #define BLOCKSIZE (1<<12) #define BLOCKMASK (~(BLOCKSIZE-1)) static int DoRegularFile(int fd, off_t dumpsize, char *buf, const char *device, const char *filename, FILE *fp) { int he, hs, nr, nw, wl; off_t dmpcnt, origsize; dmpcnt = 0; origsize = dumpsize; he = 0; while (dumpsize > 0) { wl = BUFFERSIZE; if (wl > dumpsize) wl = dumpsize; nr = read(fd, buf, wl); if (nr != wl) { if (nr == 0) syslog(LOG_WARNING, "WARNING: EOF on dump device"); else syslog(LOG_ERR, "read error on %s: %m", device); nerr++; return (-1); } if (compress) { nw = fwrite(buf, 1, wl, fp); } else { for (nw = 0; nw < nr; nw = he) { /* find a contiguous block of zeroes */ for (hs = nw; hs < nr; hs += BLOCKSIZE) { for (he = hs; he < nr && buf[he] == 0; ++he) /* nothing */ ; /* is the hole long enough to matter? */ if (he >= hs + BLOCKSIZE) break; } /* back down to a block boundary */ he &= BLOCKMASK; /* * 1) Don't go beyond the end of the buffer. * 2) If the end of the buffer is less than * BLOCKSIZE bytes away, we're at the end * of the file, so just grab what's left. */ if (hs + BLOCKSIZE > nr) hs = he = nr; /* * At this point, we have a partial ordering: * nw <= hs <= he <= nr * If hs > nw, buf[nw..hs] contains non-zero data. * If he > hs, buf[hs..he] is all zeroes. */ if (hs > nw) if (fwrite(buf + nw, hs - nw, 1, fp) != 1) break; if (he > hs) if (fseeko(fp, he - hs, SEEK_CUR) == -1) break; } } if (nw != wl) { syslog(LOG_ERR, "write error on %s file: %m", filename); syslog(LOG_WARNING, "WARNING: vmcore may be incomplete"); nerr++; return (-1); } if (verbose) { dmpcnt += wl; printf("%llu\r", (unsigned long long)dmpcnt); fflush(stdout); } dumpsize -= wl; if (got_siginfo) { printf("%s %.1lf%%\n", filename, (100.0 - (100.0 * (double)dumpsize / (double)origsize))); got_siginfo = 0; } } return (0); } /* * Specialized version of dump-reading logic for use with textdumps, which * are written backwards from the end of the partition, and must be reversed * before being written to the file. Textdumps are small, so do a bit less * work to optimize/sparsify. */ static int DoTextdumpFile(int fd, off_t dumpsize, off_t lasthd, char *buf, const char *device, const char *filename, FILE *fp) { int nr, nw, wl; off_t dmpcnt, totsize; totsize = dumpsize; dmpcnt = 0; wl = 512; if ((dumpsize % wl) != 0) { syslog(LOG_ERR, "textdump uneven multiple of 512 on %s", device); nerr++; return (-1); } while (dumpsize > 0) { nr = pread(fd, buf, wl, lasthd - (totsize - dumpsize) - wl); if (nr != wl) { if (nr == 0) syslog(LOG_WARNING, "WARNING: EOF on dump device"); else syslog(LOG_ERR, "read error on %s: %m", device); nerr++; return (-1); } nw = fwrite(buf, 1, wl, fp); if (nw != wl) { syslog(LOG_ERR, "write error on %s file: %m", filename); syslog(LOG_WARNING, "WARNING: textdump may be incomplete"); nerr++; return (-1); } if (verbose) { dmpcnt += wl; printf("%llu\r", (unsigned long long)dmpcnt); fflush(stdout); } dumpsize -= wl; } return (0); } static void DoFile(const char *savedir, const char *device) { xo_handle_t *xostdout, *xoinfo; static char infoname[PATH_MAX], corename[PATH_MAX], linkname[PATH_MAX]; - static char *buf = NULL; + static char *buf = NULL, *temp = NULL; struct kerneldumpheader kdhf, kdhl; off_t mediasize, dumpsize, firsthd, lasthd; FILE *info, *fp; mode_t oumask; int fd, fdinfo, error; int bounds, status; u_int sectorsize, xostyle; int istextdump; bounds = getbounds(); mediasize = 0; status = STATUS_UNKNOWN; xostdout = xo_create_to_file(stdout, XO_STYLE_TEXT, 0); if (xostdout == NULL) { syslog(LOG_ERR, "%s: %m", infoname); return; } if (maxdumps > 0 && bounds == maxdumps) bounds = 0; if (buf == NULL) { buf = malloc(BUFFERSIZE); if (buf == NULL) { syslog(LOG_ERR, "%m"); return; } } if (verbose) printf("checking for kernel dump on device %s\n", device); fd = open(device, (checkfor || keep) ? O_RDONLY : O_RDWR); if (fd < 0) { syslog(LOG_ERR, "%s: %m", device); return; } error = ioctl(fd, DIOCGMEDIASIZE, &mediasize); if (!error) error = ioctl(fd, DIOCGSECTORSIZE, §orsize); if (error) { syslog(LOG_ERR, "couldn't find media and/or sector size of %s: %m", device); goto closefd; } if (verbose) { printf("mediasize = %lld\n", (long long)mediasize); printf("sectorsize = %u\n", sectorsize); } + if (sectorsize < sizeof(kdhl)) { + syslog(LOG_ERR, + "Sector size is less the kernel dump header %zu", + sizeof(kdhl)); + goto closefd; + } + lasthd = mediasize - sectorsize; + if (temp == NULL) { + temp = malloc(sectorsize); + if (temp == NULL) { + syslog(LOG_ERR, "%m"); + return; + } + } if (lseek(fd, lasthd, SEEK_SET) != lasthd || - read(fd, &kdhl, sizeof(kdhl)) != sizeof(kdhl)) { + read(fd, temp, sectorsize) != sectorsize) { syslog(LOG_ERR, "error reading last dump header at offset %lld in %s: %m", (long long)lasthd, device); goto closefd; } + memcpy(&kdhl, temp, sizeof(kdhl)); istextdump = 0; if (strncmp(kdhl.magic, TEXTDUMPMAGIC, sizeof kdhl) == 0) { if (verbose) printf("textdump magic on last dump header on %s\n", device); istextdump = 1; if (dtoh32(kdhl.version) != KERNELDUMP_TEXT_VERSION) { syslog(LOG_ERR, "unknown version (%d) in last dump header on %s", dtoh32(kdhl.version), device); status = STATUS_BAD; if (force == 0) goto closefd; } } else if (memcmp(kdhl.magic, KERNELDUMPMAGIC, sizeof kdhl.magic) == 0) { if (dtoh32(kdhl.version) != KERNELDUMPVERSION) { syslog(LOG_ERR, "unknown version (%d) in last dump header on %s", dtoh32(kdhl.version), device); status = STATUS_BAD; if (force == 0) goto closefd; } } else { if (verbose) printf("magic mismatch on last dump header on %s\n", device); status = STATUS_BAD; if (force == 0) goto closefd; if (memcmp(kdhl.magic, KERNELDUMPMAGIC_CLEARED, sizeof kdhl.magic) == 0) { if (verbose) printf("forcing magic on %s\n", device); memcpy(kdhl.magic, KERNELDUMPMAGIC, sizeof kdhl.magic); } else { syslog(LOG_ERR, "unable to force dump - bad magic"); goto closefd; } if (dtoh32(kdhl.version) != KERNELDUMPVERSION) { syslog(LOG_ERR, "unknown version (%d) in last dump header on %s", dtoh32(kdhl.version), device); status = STATUS_BAD; if (force == 0) goto closefd; } } nfound++; if (clear) goto nuke; if (kerneldump_parity(&kdhl)) { syslog(LOG_ERR, "parity error on last dump header on %s", device); nerr++; status = STATUS_BAD; if (force == 0) goto closefd; } dumpsize = dtoh64(kdhl.dumplength); - firsthd = lasthd - dumpsize - sizeof kdhf; + firsthd = lasthd - dumpsize - sectorsize; if (lseek(fd, firsthd, SEEK_SET) != firsthd || - read(fd, &kdhf, sizeof(kdhf)) != sizeof(kdhf)) { + read(fd, temp, sectorsize) != sectorsize) { syslog(LOG_ERR, "error reading first dump header at offset %lld in %s: %m", (long long)firsthd, device); nerr++; goto closefd; } + memcpy(&kdhf, temp, sizeof(kdhf)); if (verbose >= 2) { printf("First dump headers:\n"); printheader(xostdout, &kdhf, device, bounds, -1); printf("\nLast dump headers:\n"); printheader(xostdout, &kdhl, device, bounds, -1); printf("\n"); } - if (memcmp(&kdhl, &kdhf, sizeof kdhl)) { + if (memcmp(&kdhl, &kdhf, sizeof(kdhl))) { syslog(LOG_ERR, "first and last dump headers disagree on %s", device); nerr++; status = STATUS_BAD; if (force == 0) goto closefd; } else { status = STATUS_GOOD; } if (checkfor) { printf("A dump exists on %s\n", device); close(fd); exit(0); } - if (kdhl.panicstring[0]) + if (kdhl.panicstring[0] != '\0') syslog(LOG_ALERT, "reboot after panic: %*s", (int)sizeof(kdhl.panicstring), kdhl.panicstring); else syslog(LOG_ALERT, "reboot"); if (verbose) printf("Checking for available free space\n"); if (!check_space(savedir, dumpsize, bounds)) { nerr++; goto closefd; } writebounds(bounds + 1); saved_dump_remove(bounds); snprintf(infoname, sizeof(infoname), "info.%d", bounds); /* * Create or overwrite any existing dump header files. */ fdinfo = open(infoname, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fdinfo < 0) { syslog(LOG_ERR, "%s: %m", infoname); nerr++; goto closefd; } oumask = umask(S_IRWXG|S_IRWXO); /* Restrict access to the core file.*/ if (compress) { snprintf(corename, sizeof(corename), "%s.%d.gz", istextdump ? "textdump.tar" : "vmcore", bounds); fp = zopen(corename, "w"); } else { snprintf(corename, sizeof(corename), "%s.%d", istextdump ? "textdump.tar" : "vmcore", bounds); fp = fopen(corename, "w"); } if (fp == NULL) { syslog(LOG_ERR, "%s: %m", corename); close(fdinfo); nerr++; goto closefd; } (void)umask(oumask); info = fdopen(fdinfo, "w"); if (info == NULL) { syslog(LOG_ERR, "fdopen failed: %m"); nerr++; goto closeall; } xostyle = xo_get_style(NULL); xoinfo = xo_create_to_file(info, xostyle, 0); if (xoinfo == NULL) { syslog(LOG_ERR, "%s: %m", infoname); nerr++; goto closeall; } xo_open_container_h(xoinfo, "crashdump"); if (verbose) printheader(xostdout, &kdhl, device, bounds, status); printheader(xoinfo, &kdhl, device, bounds, status); xo_close_container_h(xoinfo, "crashdump"); xo_flush_h(xoinfo); xo_finish_h(xoinfo); fclose(info); syslog(LOG_NOTICE, "writing %score to %s/%s", compress ? "compressed " : "", savedir, corename); if (istextdump) { if (DoTextdumpFile(fd, dumpsize, lasthd, buf, device, corename, fp) < 0) goto closeall; } else { if (DoRegularFile(fd, dumpsize, buf, device, corename, fp) < 0) goto closeall; } if (verbose) printf("\n"); if (fclose(fp) < 0) { syslog(LOG_ERR, "error on %s: %m", corename); nerr++; goto closefd; } symlinks_remove(); if (symlink(infoname, "info.last") == -1) { syslog(LOG_WARNING, "unable to create symlink %s/%s: %m", savedir, "info.last"); } if (compress) { snprintf(linkname, sizeof(linkname), "%s.last.gz", istextdump ? "textdump.tar" : "vmcore"); } else { snprintf(linkname, sizeof(linkname), "%s.last", istextdump ? "textdump.tar" : "vmcore"); } if (symlink(corename, linkname) == -1) { syslog(LOG_WARNING, "unable to create symlink %s/%s: %m", savedir, linkname); } nsaved++; if (verbose) printf("dump saved\n"); nuke: if (!keep) { if (verbose) printf("clearing dump header\n"); - memcpy(kdhl.magic, KERNELDUMPMAGIC_CLEARED, sizeof kdhl.magic); + memcpy(kdhl.magic, KERNELDUMPMAGIC_CLEARED, sizeof(kdhl.magic)); + memcpy(temp, &kdhl, sizeof(kdhl)); if (lseek(fd, lasthd, SEEK_SET) != lasthd || - write(fd, &kdhl, sizeof(kdhl)) != sizeof(kdhl)) + write(fd, temp, sectorsize) != sectorsize) syslog(LOG_ERR, "error while clearing the dump header: %m"); } xo_close_container_h(xostdout, "crashdump"); xo_finish_h(xostdout); close(fd); return; closeall: fclose(fp); closefd: close(fd); } static void usage(void) { xo_error("%s\n%s\n%s\n", "usage: savecore -c [-v] [device ...]", " savecore -C [-v] [device ...]", " savecore [-fkvz] [-m maxdumps] [directory [device ...]]"); exit(1); } int main(int argc, char **argv) { const char *savedir = "."; struct fstab *fsp; int i, ch, error; checkfor = compress = clear = force = keep = verbose = 0; nfound = nsaved = nerr = 0; openlog("savecore", LOG_PERROR, LOG_DAEMON); signal(SIGINFO, infohandler); argc = xo_parse_args(argc, argv); if (argc < 0) exit(1); while ((ch = getopt(argc, argv, "Ccfkm:vz")) != -1) switch(ch) { case 'C': checkfor = 1; break; case 'c': clear = 1; break; case 'f': force = 1; break; case 'k': keep = 1; break; case 'm': maxdumps = atoi(optarg); if (maxdumps <= 0) { syslog(LOG_ERR, "Invalid maxdump value"); exit(1); } break; case 'v': verbose++; break; case 'z': compress = 1; break; case '?': default: usage(); } if (checkfor && (clear || force || keep)) usage(); if (clear && (compress || keep)) usage(); if (maxdumps > 0 && (checkfor || clear)) usage(); argc -= optind; argv += optind; if (argc >= 1 && !checkfor && !clear) { error = chdir(argv[0]); if (error) { syslog(LOG_ERR, "chdir(%s): %m", argv[0]); exit(1); } savedir = argv[0]; argc--; argv++; } if (argc == 0) { for (;;) { fsp = getfsent(); if (fsp == NULL) break; if (strcmp(fsp->fs_vfstype, "swap") && strcmp(fsp->fs_vfstype, "dump")) continue; DoFile(savedir, fsp->fs_spec); } } else { for (i = 0; i < argc; i++) DoFile(savedir, argv[i]); } /* Emit minimal output. */ if (nfound == 0) { if (checkfor) { if (verbose) printf("No dump exists\n"); exit(1); } if (verbose) syslog(LOG_WARNING, "no dumps found"); } else if (nsaved == 0) { if (nerr != 0) { if (verbose) syslog(LOG_WARNING, "unsaved dumps found but not saved"); exit(1); } else if (verbose) syslog(LOG_WARNING, "no unsaved dumps found"); } return (0); } static void infohandler(int sig __unused) { got_siginfo = 1; } Index: head/sys/amd64/amd64/minidump_machdep.c =================================================================== --- head/sys/amd64/amd64/minidump_machdep.c (revision 298075) +++ head/sys/amd64/amd64/minidump_machdep.c (revision 298076) @@ -1,487 +1,485 @@ /*- * Copyright (c) 2006 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_pmap.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This * is to protect us from metadata and to protect metadata from us. */ #define SIZEOF_METADATA (64*1024) -#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) -#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) - uint64_t *vm_page_dump; int vm_page_dump_size; static struct kerneldumpheader kdh; static off_t dumplo; /* Handle chunked writes. */ static size_t fragsz; static void *dump_va; static size_t counter, progress, dumpsize; CTASSERT(sizeof(*vm_page_dump) == 8); static int is_dumpable(vm_paddr_t pa) { vm_page_t m; int i; if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) return ((m->flags & PG_NODUMP) == 0); for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) return (1); } return (0); } #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) static int blk_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); error = dump_write(di, dump_va, 0, dumplo, fragsz); dumplo += fragsz; fragsz = 0; return (error); } static struct { int min_per; int max_per; int visited; } progress_track[10] = { { 0, 10, 0}, { 10, 20, 0}, { 20, 30, 0}, { 30, 40, 0}, { 40, 50, 0}, { 50, 60, 0}, { 60, 70, 0}, { 70, 80, 0}, { 80, 90, 0}, { 90, 100, 0} }; static void report_progress(size_t progress, size_t dumpsize) { int sofar, i; sofar = 100 - ((progress * 100) / dumpsize); for (i = 0; i < nitems(progress_track); i++) { if (sofar < progress_track[i].min_per || sofar > progress_track[i].max_per) continue; if (progress_track[i].visited) return; progress_track[i].visited = 1; printf("..%d%%", sofar); return; } } static int blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) { size_t len; int error, i, c; u_int maxdumpsz; maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); if (maxdumpsz == 0) /* seatbelt */ maxdumpsz = PAGE_SIZE; error = 0; if ((sz % PAGE_SIZE) != 0) { printf("size not page aligned\n"); return (EINVAL); } if (ptr != NULL && pa != 0) { printf("cant have both va and pa!\n"); return (EINVAL); } if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { printf("address not page aligned %p\n", ptr); return (EINVAL); } if (ptr != NULL) { /* If we're doing a virtual dump, flush any pre-existing pa pages */ error = blk_flush(di); if (error) return (error); } while (sz) { len = maxdumpsz - fragsz; if (len > sz) len = sz; counter += len; progress -= len; if (counter >> 24) { report_progress(progress, dumpsize); counter &= (1<<24) - 1; } wdog_kern_pat(WD_LASTVAL); if (ptr) { error = dump_write(di, ptr, 0, dumplo, len); if (error) return (error); dumplo += len; ptr += len; sz -= len; } else { for (i = 0; i < len; i += PAGE_SIZE) dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); fragsz += len; pa += len; sz -= len; if (fragsz == maxdumpsz) { error = blk_flush(di); if (error) return (error); } } /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) printf(" (CTRL-C to abort) "); } return (0); } /* A fake page table page, to avoid having to handle both 4K and 2M pages */ static pd_entry_t fakepd[NPDEPG]; int minidumpsys(struct dumperinfo *di) { uint32_t pmapsize; vm_offset_t va; int error; uint64_t bits; uint64_t *pml4, *pdp, *pd, *pt, pa; + size_t size; int i, ii, j, k, n, bit; int retry_count; struct minidumphdr mdhdr; retry_count = 0; retry: retry_count++; counter = 0; for (i = 0; i < nitems(progress_track); i++) progress_track[i].visited = 0; /* Walk page table pages, set bits in vm_page_dump */ pmapsize = 0; for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, kernel_vm_end); ) { /* * We always write a page, even if it is zero. Each * page written corresponds to 1GB of space */ pmapsize += PAGE_SIZE; ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); if ((pdp[i] & PG_V) == 0) { va += NBPDP; continue; } /* * 1GB page is represented as 512 2MB pages in a dump. */ if ((pdp[i] & PG_PS) != 0) { va += NBPDP; pa = pdp[i] & PG_PS_FRAME; for (n = 0; n < NPDEPG * NPTEPG; n++) { if (is_dumpable(pa)) dump_add_page(pa); pa += PAGE_SIZE; } continue; } pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); for (n = 0; n < NPDEPG; n++, va += NBPDR) { j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); if ((pd[j] & PG_V) == 0) continue; if ((pd[j] & PG_PS) != 0) { /* This is an entire 2M page. */ pa = pd[j] & PG_PS_FRAME; for (k = 0; k < NPTEPG; k++) { if (is_dumpable(pa)) dump_add_page(pa); pa += PAGE_SIZE; } continue; } pa = pd[j] & PG_FRAME; /* set bit for this PTE page */ if (is_dumpable(pa)) dump_add_page(pa); /* and for each valid page in this 2MB block */ pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME); for (k = 0; k < NPTEPG; k++) { if ((pt[k] & PG_V) == 0) continue; pa = pt[k] & PG_FRAME; if (is_dumpable(pa)) dump_add_page(pa); } } } /* Calculate dump size. */ dumpsize = pmapsize; dumpsize += round_page(msgbufp->msg_size); dumpsize += round_page(vm_page_dump_size); for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = bsfq(bits); pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; /* Clear out undumpable pages now if needed */ if (is_dumpable(pa)) { dumpsize += PAGE_SIZE; } else { dump_drop_page(pa); } bits &= ~(1ul << bit); } } dumpsize += PAGE_SIZE; /* Determine dump offset on device. */ - if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { + if (di->mediasize < SIZEOF_METADATA + dumpsize + di->blocksize * 2) { error = E2BIG; goto fail; } dumplo = di->mediaoffset + di->mediasize - dumpsize; - dumplo -= sizeof(kdh) * 2; + dumplo -= di->blocksize * 2; progress = dumpsize; /* Initialize mdhdr */ bzero(&mdhdr, sizeof(mdhdr)); strcpy(mdhdr.magic, MINIDUMP_MAGIC); mdhdr.version = MINIDUMP_VERSION; mdhdr.msgbufsize = msgbufp->msg_size; mdhdr.bitmapsize = vm_page_dump_size; mdhdr.pmapsize = pmapsize; mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; mdhdr.dmapbase = DMAP_MIN_ADDRESS; mdhdr.dmapend = DMAP_MAX_ADDRESS; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize); printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, ptoa((uintmax_t)physmem) / 1048576); /* Dump leader */ - error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); + error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size); if (error) goto fail; - dumplo += sizeof(kdh); + dumplo += size; /* Dump my header */ bzero(&fakepd, sizeof(fakepd)); bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); if (error) goto fail; /* Dump msgbuf up front */ error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); if (error) goto fail; /* Dump bitmap */ error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); if (error) goto fail; /* Dump kernel page directory pages */ bzero(fakepd, sizeof(fakepd)); for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, kernel_vm_end); va += NBPDP) { ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); /* We always write a page, even if it is zero */ if ((pdp[i] & PG_V) == 0) { error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakepd in the same block */ error = blk_flush(di); if (error) goto fail; continue; } /* 1GB page is represented as 512 2MB pages in a dump */ if ((pdp[i] & PG_PS) != 0) { /* PDPE and PDP have identical layout in this case */ fakepd[0] = pdp[i]; for (j = 1; j < NPDEPG; j++) fakepd[j] = fakepd[j - 1] + NBPDR; error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakepd in the same block */ error = blk_flush(di); if (error) goto fail; bzero(fakepd, sizeof(fakepd)); continue; } pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); error = blk_write(di, (char *)pd, 0, PAGE_SIZE); if (error) goto fail; error = blk_flush(di); if (error) goto fail; } /* Dump memory chunks */ /* XXX cluster it up and use blk_dump() */ for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { bits = vm_page_dump[i]; while (bits) { bit = bsfq(bits); pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; error = blk_write(di, 0, pa, PAGE_SIZE); if (error) goto fail; bits &= ~(1ul << bit); } } error = blk_flush(di); if (error) goto fail; /* Dump trailer */ - error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); + error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size); if (error) goto fail; - dumplo += sizeof(kdh); + dumplo += size; /* Signal completion, signoff and exit stage left. */ dump_write(di, NULL, 0, 0, 0); printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; printf("\n"); if (error == ENOSPC) { printf("Dump map grown while dumping. "); if (retry_count < 5) { printf("Retrying...\n"); goto retry; } printf("Dump failed.\n"); } else if (error == ECANCELED) printf("Dump aborted\n"); else if (error == E2BIG) printf("Dump failed. Partition too small.\n"); else printf("** DUMP FAILED (ERROR %d) **\n", error); return (error); } void dump_add_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 6; /* 2^6 = 64 */ bit = pa & 63; atomic_set_long(&vm_page_dump[idx], 1ul << bit); } void dump_drop_page(vm_paddr_t pa) { int idx, bit; pa >>= PAGE_SHIFT; idx = pa >> 6; /* 2^6 = 64 */ bit = pa & 63; atomic_clear_long(&vm_page_dump[idx], 1ul << bit); } Index: head/sys/kern/kern_dump.c =================================================================== --- head/sys/kern/kern_dump.c (revision 298075) +++ head/sys/kern/kern_dump.c (revision 298076) @@ -1,399 +1,397 @@ /*- * Copyright (c) 2002 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_watchdog.h" #include #include #include #include #include #include #include #ifdef SW_WATCHDOG #include #endif #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This * is to protect us from metadata and to protect metadata from us. */ #define SIZEOF_METADATA (64*1024) -#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) -#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1)) +#define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE) off_t dumplo; /* Handle buffered writes. */ -static char buffer[DEV_BSIZE]; static size_t fragsz; struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; #if !defined(__powerpc__) && !defined(__sparc__) void dumpsys_gen_pa_init(void) { int n, idx; bzero(dump_map, sizeof(dump_map)); for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) { idx = n * 2; if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) break; dump_map[n].pa_start = dump_avail[idx]; dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx]; } } #endif struct dump_pa * dumpsys_gen_pa_next(struct dump_pa *mdp) { if (mdp == NULL) return (&dump_map[0]); mdp++; if (mdp->pa_size == 0) mdp = NULL; return (mdp); } void dumpsys_gen_wbinv_all(void) { } void dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, void *va __unused) { } #if !defined(__sparc__) int dumpsys_gen_write_aux_headers(struct dumperinfo *di) { return (0); } #endif int dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz) { size_t len; int error; while (sz) { - len = DEV_BSIZE - fragsz; + len = di->blocksize - fragsz; if (len > sz) len = sz; - bcopy(ptr, buffer + fragsz, len); + memcpy((char *)di->blockbuf + fragsz, ptr, len); fragsz += len; ptr += len; sz -= len; - if (fragsz == DEV_BSIZE) { - error = dump_write(di, buffer, 0, dumplo, - DEV_BSIZE); + if (fragsz == di->blocksize) { + error = dump_write(di, di->blockbuf, 0, dumplo, + di->blocksize); if (error) return (error); - dumplo += DEV_BSIZE; + dumplo += di->blocksize; fragsz = 0; } } return (0); } int dumpsys_buf_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); - error = dump_write(di, buffer, 0, dumplo, DEV_BSIZE); - dumplo += DEV_BSIZE; + error = dump_write(di, di->blockbuf, 0, dumplo, di->blocksize); + dumplo += di->blocksize; fragsz = 0; return (error); } CTASSERT(PAGE_SHIFT < 20); #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT)) int dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg) { struct dumperinfo *di = (struct dumperinfo*)arg; vm_paddr_t pa; void *va; uint64_t pgs; size_t counter, sz, chunk; int c, error; u_int maxdumppgs; error = 0; /* catch case in which chunk size is 0 */ counter = 0; /* Update twiddle every 16MB */ va = NULL; pgs = mdp->pa_size / PAGE_SIZE; pa = mdp->pa_start; maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS); if (maxdumppgs == 0) /* seatbelt */ maxdumppgs = 1; printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs), (uintmax_t)pgs); dumpsys_wbinv_all(); while (pgs) { chunk = pgs; if (chunk > maxdumppgs) chunk = maxdumppgs; sz = chunk << PAGE_SHIFT; counter += sz; if (counter >> 24) { printf(" %ju", (uintmax_t)PG2MB(pgs)); counter &= (1 << 24) - 1; } dumpsys_map_chunk(pa, chunk, &va); #ifdef SW_WATCHDOG wdog_kern_pat(WD_LASTVAL); #endif error = dump_write(di, va, 0, dumplo, sz); dumpsys_unmap_chunk(pa, chunk, va); if (error) break; dumplo += sz; pgs -= chunk; pa += sz; /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) printf(" (CTRL-C to abort) "); } printf(" ... %s\n", (error) ? "fail" : "ok"); return (error); } int dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg) { struct dump_pa *mdp; int error, seqnr; seqnr = 0; mdp = dumpsys_pa_next(NULL); while (mdp != NULL) { error = (*cb)(mdp, seqnr++, arg); if (error) return (-error); mdp = dumpsys_pa_next(mdp); } return (seqnr); } #if !defined(__sparc__) static off_t fileofs; static int cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg) { struct dumperinfo *di = (struct dumperinfo*)arg; Elf_Phdr phdr; uint64_t size; int error; size = mdp->pa_size; bzero(&phdr, sizeof(phdr)); phdr.p_type = PT_LOAD; phdr.p_flags = PF_R; /* XXX */ phdr.p_offset = fileofs; #ifdef __powerpc__ phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L); phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start); #else phdr.p_vaddr = mdp->pa_start; phdr.p_paddr = mdp->pa_start; #endif phdr.p_filesz = size; phdr.p_memsz = size; phdr.p_align = PAGE_SIZE; error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr)); fileofs += phdr.p_filesz; return (error); } static int cb_size(struct dump_pa *mdp, int seqnr, void *arg) { uint64_t *sz; sz = (uint64_t *)arg; *sz += (uint64_t)mdp->pa_size; return (0); } int dumpsys_generic(struct dumperinfo *di) { static struct kerneldumpheader kdh; Elf_Ehdr ehdr; uint64_t dumpsize; off_t hdrgap; - size_t hdrsz; + size_t hdrsz, size; int error; #ifndef __powerpc__ if (do_minidump) return (minidumpsys(di)); #endif bzero(&ehdr, sizeof(ehdr)); ehdr.e_ident[EI_MAG0] = ELFMAG0; ehdr.e_ident[EI_MAG1] = ELFMAG1; ehdr.e_ident[EI_MAG2] = ELFMAG2; ehdr.e_ident[EI_MAG3] = ELFMAG3; ehdr.e_ident[EI_CLASS] = ELF_CLASS; #if BYTE_ORDER == LITTLE_ENDIAN ehdr.e_ident[EI_DATA] = ELFDATA2LSB; #else ehdr.e_ident[EI_DATA] = ELFDATA2MSB; #endif ehdr.e_ident[EI_VERSION] = EV_CURRENT; ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ ehdr.e_type = ET_CORE; ehdr.e_machine = EM_VALUE; ehdr.e_phoff = sizeof(ehdr); ehdr.e_flags = 0; ehdr.e_ehsize = sizeof(ehdr); ehdr.e_phentsize = sizeof(Elf_Phdr); ehdr.e_shentsize = sizeof(Elf_Shdr); dumpsys_pa_init(); /* Calculate dump size. */ dumpsize = 0L; ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) + DUMPSYS_NUM_AUX_HDRS; hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; fileofs = MD_ALIGN(hdrsz); dumpsize += fileofs; - hdrgap = fileofs - DEV_ALIGN(hdrsz); + hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize); /* Determine dump offset on device. */ - if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) { + if (di->mediasize < SIZEOF_METADATA + dumpsize + di->blocksize * 2) { error = ENOSPC; goto fail; } dumplo = di->mediaoffset + di->mediasize - dumpsize; - dumplo -= sizeof(kdh) * 2; + dumplo -= di->blocksize * 2; mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, dumpsize, di->blocksize); printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); /* Dump leader */ - error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); + error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size); if (error) goto fail; - dumplo += sizeof(kdh); + dumplo += size; /* Dump ELF header */ error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); if (error) goto fail; /* Dump program headers */ error = dumpsys_foreach_chunk(cb_dumphdr, di); if (error < 0) goto fail; error = dumpsys_write_aux_headers(di); if (error < 0) goto fail; dumpsys_buf_flush(di); /* * All headers are written using blocked I/O, so we know the * current offset is (still) block aligned. Skip the alignement * in the file to have the segment contents aligned at page * boundary. We cannot use MD_ALIGN on dumplo, because we don't * care and may very well be unaligned within the dump device. */ dumplo += hdrgap; /* Dump memory chunks (updates dumplo) */ error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); if (error < 0) goto fail; /* Dump trailer */ - error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh)); + error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size); if (error) goto fail; /* Signal completion, signoff and exit stage left. */ dump_write(di, NULL, 0, 0, 0); printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; if (error == ECANCELED) printf("\nDump aborted\n"); else if (error == ENOSPC) printf("\nDump failed. Partition too small.\n"); else printf("\n** DUMP FAILED (ERROR %d) **\n", error); return (error); } #endif Index: head/sys/kern/kern_shutdown.c =================================================================== --- head/sys/kern/kern_shutdown.c (revision 298075) +++ head/sys/kern/kern_shutdown.c (revision 298076) @@ -1,901 +1,931 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kdb.h" #include "opt_panic.h" #include "opt_sched.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); + #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, &panic_reboot_wait_time, 0, "Seconds to wait before rebooting after a panic"); /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef KDB #ifdef KDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &debugger_on_panic, 0, "Run debugger on kernel panic"); #ifdef KDB_TRACE static int trace_on_panic = 1; #else static int trace_on_panic = 0; #endif SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &trace_on_panic, 0, "Print stack trace on kernel panic"); #endif /* KDB */ static int sync_on_panic = 0; SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); #ifndef DIAGNOSTIC static int show_busybufs; #else static int show_busybufs = 1; #endif SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, &show_busybufs, 0, ""); int suspend_blocked = 0; SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, &suspend_blocked, 0, "Block suspend due to a pending shutdown"); /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; int dumping; /* system is dumping */ int rebooting; /* system is rebooting */ static struct dumperinfo dumper; /* our selected dumper */ /* Context information for dump-debuggers. */ static struct pcb dumppcb; /* Registers. */ lwpid_t dumptid; /* Thread ID. */ static struct cdevsw reroot_cdevsw = { .d_version = D_VERSION, .d_name = "reroot", }; static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); static int kern_reroot(void); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* * The only reason this exists is to create the /dev/reroot/ directory, * used by reroot code in init(8) as a mountpoint for tmpfs. */ static void reroot_conf(void *unused) { int error; struct cdev *cdev; error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); if (error != 0) { printf("%s: failed to create device node, error %d", __func__, error); } } SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); /* * The system call that results in a reboot. */ /* ARGSUSED */ int sys_reboot(struct thread *td, struct reboot_args *uap) { int error; error = 0; #ifdef MAC error = mac_system_check_reboot(td->td_ucred, uap->opt); #endif if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { if (uap->opt & RB_REROOT) { error = kern_reroot(); } else { mtx_lock(&Giant); kern_reboot(uap->opt); mtx_unlock(&Giant); } } return (error); } /* * Called by events that want to shut down.. e.g on a PC */ void shutdown_nice(int howto) { if (initproc != NULL) { /* Send a signal to init(8) and have it shutdown the world. */ PROC_LOCK(initproc); if (howto & RB_POWEROFF) kern_psignal(initproc, SIGUSR2); else if (howto & RB_HALT) kern_psignal(initproc, SIGUSR1); else kern_psignal(initproc, SIGINT); PROC_UNLOCK(initproc); } else { /* No init(8) running, so simply reboot. */ kern_reboot(howto | RB_NOSYNC); } } static void print_uptime(void) { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", (long)ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", (long)ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", (long)ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", (long)ts.tv_sec); } int doadump(boolean_t textdump) { boolean_t coredump; int error; error = 0; if (dumping) return (EBUSY); if (dumper.dumper == NULL) return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; coredump = TRUE; #ifdef DDB if (textdump && textdump_pending) { coredump = FALSE; textdump_dumpsys(&dumper); } #endif if (coredump) error = dumpsys(&dumper); dumping--; return (error); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void kern_reboot(int howto) { static int once = 0; #if defined(SMP) /* * Bind us to CPU 0 so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ if (!SCHEDULER_STOPPED()) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); } #endif /* We're in the process of rebooting. */ rebooting = 1; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { once = 1; bufshutdown(show_busybufs); } print_uptime(); cngrab(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * The system call that results in changing the rootfs. */ static int kern_reroot(void) { struct vnode *oldrootvnode, *vp; struct mount *mp, *devmp; int error; if (curproc != initproc) return (EPERM); /* * Mark the filesystem containing currently-running executable * (the temporary copy of init(8)) busy. */ vp = curproc->p_textvp; error = vn_lock(vp, LK_SHARED); if (error != 0) return (error); mp = vp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vp, 0); error = vfs_busy(mp, 0); vn_lock(vp, LK_SHARED | LK_RETRY); vfs_rel(mp); if (error != 0) { VOP_UNLOCK(vp, 0); return (ENOENT); } if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); vfs_unbusy(mp); return (ENOENT); } } VOP_UNLOCK(vp, 0); /* * Remove the filesystem containing currently-running executable * from the mount list, to prevent it from being unmounted * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). * * Also preserve /dev - forcibly unmounting it could cause driver * reinitialization. */ vfs_ref(rootdevmp); devmp = rootdevmp; rootdevmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); TAILQ_REMOVE(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); oldrootvnode = rootvnode; /* * Unmount everything except for the two filesystems preserved above. */ vfs_unmountall(); /* * Add /dev back; vfs_mountroot() will move it into its new place. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); rootdevmp = devmp; vfs_rel(rootdevmp); /* * Mount the new rootfs. */ vfs_mountroot(); /* * Update all references to the old rootvnode. */ mountcheckdirs(oldrootvnode, rootvnode); /* * Add the temporary filesystem back and unbusy it. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_unbusy(mp); return (0); } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: howto &= ~RB_HALT; break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (panic_reboot_wait_time != 0) { if (panic_reboot_wait_time != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", panic_reboot_wait_time); for (loop = panic_reboot_wait_time * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot,\n"); printf("--> or switch off the system now.\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* * Acquiring smp_ipi_mtx here has a double effect: * - it disables interrupts avoiding CPU0 preemption * by fast handlers (thus deadlocking against other CPUs) * - it avoids deadlocks against smp_rendezvous() or, more * generally, threads busy-waiting, with this spinlock held, * and waiting for responses by threads on other CPUs * (ie. smp_tlb_shootdown()). * * For the !SMP case it just needs to handle the former problem. */ #ifdef SMP mtx_lock_spin(&smp_ipi_mtx); #else spinlock_enter(); #endif /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } #if defined(WITNESS) || defined(INVARIANTS) static int kassert_warn_only = 0; #ifdef KDB static int kassert_do_kdb = 0; #endif #ifdef KTR static int kassert_do_ktr = 0; #endif static int kassert_do_log = 1; static int kassert_log_pps_limit = 4; static int kassert_log_mute_at = 0; static int kassert_log_panic_at = 0; static int kassert_warnings = 0; SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN, &kassert_warn_only, 0, "KASSERT triggers a panic (1) or just a warning (0)"); #ifdef KDB SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN, &kassert_do_kdb, 0, "KASSERT will enter the debugger"); #endif #ifdef KTR SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN, &kassert_do_ktr, 0, "KASSERT does a KTR, set this to the KTRMASK you want"); #endif SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN, &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN, &kassert_warnings, 0, "number of KASSERTs that have been triggered"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN, &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN, &kassert_log_pps_limit, 0, "limit number of log messages per second"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN, &kassert_log_mute_at, 0, "max number of KASSERTS to log"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, kassert_sysctl_kassert, "I", "set to trigger a test kassert"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); return (0); } /* * Called by KASSERT, this decides if we will panic * or if we will log via printf and/or ktr. */ void kassert_panic(const char *fmt, ...) { static char buf[256]; va_list ap; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); /* * panic if we're not just warning, or if we've exceeded * kassert_log_panic_at warnings. */ if (!kassert_warn_only || (kassert_log_panic_at > 0 && kassert_warnings >= kassert_log_panic_at)) { va_start(ap, fmt); vpanic(fmt, ap); /* NORETURN */ } #ifdef KTR if (kassert_do_ktr) CTR0(ktr_mask, buf); #endif /* KTR */ /* * log if we've not yet met the mute limit. */ if (kassert_do_log && (kassert_log_mute_at == 0 || kassert_warnings < kassert_log_mute_at)) { static struct timeval lasterr; static int curerr; if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { printf("KASSERT failed: %s\n", buf); kdb_backtrace(); } } #ifdef KDB if (kassert_do_kdb) { kdb_enter(KDB_WHY_KASSERT, buf); } #endif atomic_add_int(&kassert_warnings, 1); } #endif /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vpanic(fmt, ap); } void vpanic(const char *fmt, va_list ap) { #ifdef SMP cpuset_t other_cpus; #endif struct thread *td = curthread; int bootopt, newpanic; static char buf[256]; spinlock_enter(); #ifdef SMP /* * stop_cpus_hard(other_cpus) should prevent multiple CPUs from * concurrently entering panic. Only the winner will proceed * further. */ if (panicstr == NULL && !kdb_active) { other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); stop_cpus_hard(other_cpus); } /* * Ensure that the scheduler is stopped while panicking, even if panic * has been entered from kdb. */ td->td_stopsched = 1; #endif bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { bootopt |= RB_DUMP; panicstr = fmt; newpanic = 1; } if (newpanic) { (void)vsnprintf(buf, sizeof(buf), fmt, ap); panicstr = buf; cngrab(); printf("panic: %s\n", buf); } else { printf("panic: "); vprintf(fmt, ap); printf("\n"); } #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif #ifdef KDB if (newpanic && trace_on_panic) kdb_backtrace(); if (debugger_on_panic) kdb_enter(KDB_WHY_PANIC, "panic"); #endif /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; kern_reboot(bootopt); } /* * Support for poweroff delay. * * Please note that setting this delay too short might power off your machine * before the write cache on your hard disk has been flushed, leading to * soft-updates inconsistencies. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); static void poweroff_wait(void *junk, int howto) { if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); void kproc_shutdown(void *arg, int howto) { struct proc *p; int error; if (panicstr) return; p = (struct proc *)arg; printf("Waiting (max %d seconds) for system process `%s' to stop...", kproc_shutdown_wait, p->p_comm); error = kproc_suspend(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } void kthread_shutdown(void *arg, int howto) { struct thread *td; int error; if (panicstr) return; td = (struct thread *)arg; printf("Waiting (max %d seconds) for system thread `%s' to stop...", kproc_shutdown_wait, td->td_name); error = kthread_suspend(td, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, dumpdevname, 0, "Device for kernel dumps"); /* Registration of dumpers */ int set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) { size_t wantcopy; int error; error = priv_check(td, PRIV_SETDUMPER); if (error != 0) return (error); if (di == NULL) { - bzero(&dumper, sizeof dumper); + if (dumper.blockbuf != NULL) + free(dumper.blockbuf, M_DUMPER); + bzero(&dumper, sizeof(dumper)); dumpdevname[0] = '\0'; return (0); } if (dumper.dumper != NULL) return (EBUSY); dumper = *di; wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); if (wantcopy >= sizeof(dumpdevname)) { printf("set_dumper: device name truncated from '%s' -> '%s'\n", devname, dumpdevname); } + dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO); return (0); } /* Call dumper with bounds checking. */ int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { if (length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { printf("Attempt to write outside dump device boundaries.\n" "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", (intmax_t)offset, (intmax_t)di->mediaoffset, (uintmax_t)length, (intmax_t)di->mediasize); return (ENOSPC); } return (di->dumper(di->priv, virtual, physical, offset, length)); } + +/* Call dumper with bounds checking. */ +int +dump_write_pad(struct dumperinfo *di, void *virtual, vm_offset_t physical, + off_t offset, size_t length, size_t *size) +{ + char *temp; + int ret; + + if (length > di->blocksize) + return (ENOMEM); + + *size = di->blocksize; + if (length == di->blocksize) + temp = virtual; + else { + temp = di->blockbuf; + memset(temp + length, 0, di->blocksize - length); + memcpy(temp, virtual, length); + } + ret = dump_write(di, temp, physical, offset, *size); + + return (ret); +} + void mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, uint64_t dumplen, uint32_t blksz) { bzero(kdh, sizeof(*kdh)); strlcpy(kdh->magic, magic, sizeof(kdh->magic)); strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); kdh->version = htod32(KERNELDUMPVERSION); kdh->architectureversion = htod32(archver); kdh->dumplength = htod64(dumplen); kdh->dumptime = htod64(time_second); kdh->blocksize = htod32(blksz); strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); if (panicstr != NULL) strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); kdh->parity = kerneldump_parity(kdh); } Index: head/sys/sys/conf.h =================================================================== --- head/sys/sys/conf.h (revision 298075) +++ head/sys/sys/conf.h (revision 298076) @@ -1,345 +1,348 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2000 * Poul-Henning Kamp. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)conf.h 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_CONF_H_ #define _SYS_CONF_H_ #ifdef _KERNEL #include #else #include #endif struct snapdata; struct devfs_dirent; struct cdevsw; struct file; struct cdev { void *si_spare0; u_int si_flags; #define SI_ETERNAL 0x0001 /* never destroyed */ #define SI_ALIAS 0x0002 /* carrier of alias name */ #define SI_NAMED 0x0004 /* make_dev{_alias} has been called */ #define SI_CHEAPCLONE 0x0008 /* can be removed_dev'ed when vnode reclaims */ #define SI_CHILD 0x0010 /* child of another struct cdev **/ #define SI_DUMPDEV 0x0080 /* is kernel dumpdev */ #define SI_CLONELIST 0x0200 /* on a clone list */ #define SI_UNMAPPED 0x0400 /* can handle unmapped I/O */ #define SI_NOSPLIT 0x0800 /* I/O should not be split up */ struct timespec si_atime; struct timespec si_ctime; struct timespec si_mtime; uid_t si_uid; gid_t si_gid; mode_t si_mode; struct ucred *si_cred; /* cached clone-time credential */ int si_drv0; int si_refcount; LIST_ENTRY(cdev) si_list; LIST_ENTRY(cdev) si_clone; LIST_HEAD(, cdev) si_children; LIST_ENTRY(cdev) si_siblings; struct cdev *si_parent; struct mount *si_mountpt; void *si_drv1, *si_drv2; struct cdevsw *si_devsw; int si_iosize_max; /* maximum I/O size (for physio &al) */ u_long si_usecount; u_long si_threadcount; union { struct snapdata *__sid_snapdata; } __si_u; char si_name[SPECNAMELEN + 1]; }; #define si_snapdata __si_u.__sid_snapdata #ifdef _KERNEL /* * Definitions of device driver entry switches */ struct bio; struct buf; struct thread; struct uio; struct knote; struct clonedevs; struct vm_object; struct vnode; typedef int d_open_t(struct cdev *dev, int oflags, int devtype, struct thread *td); typedef int d_fdopen_t(struct cdev *dev, int oflags, struct thread *td, struct file *fp); typedef int d_close_t(struct cdev *dev, int fflag, int devtype, struct thread *td); typedef void d_strategy_t(struct bio *bp); typedef int d_ioctl_t(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); typedef int d_read_t(struct cdev *dev, struct uio *uio, int ioflag); typedef int d_write_t(struct cdev *dev, struct uio *uio, int ioflag); typedef int d_poll_t(struct cdev *dev, int events, struct thread *td); typedef int d_kqfilter_t(struct cdev *dev, struct knote *kn); typedef int d_mmap_t(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr); typedef int d_mmap_single_t(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot); typedef void d_purge_t(struct cdev *dev); typedef int dumper_t( void *_priv, /* Private to the driver. */ void *_virtual, /* Virtual (mapped) address. */ vm_offset_t _physical, /* Physical address of virtual. */ off_t _offset, /* Byte-offset to write at. */ size_t _length); /* Number of bytes to dump. */ #endif /* _KERNEL */ /* * Types for d_flags. */ #define D_TAPE 0x0001 #define D_DISK 0x0002 #define D_TTY 0x0004 #define D_MEM 0x0008 #ifdef _KERNEL #define D_TYPEMASK 0xffff /* * Flags for d_flags which the drivers can set. */ #define D_TRACKCLOSE 0x00080000 /* track all closes */ #define D_MMAP_ANON 0x00100000 /* special treatment in vm_mmap.c */ #define D_NEEDGIANT 0x00400000 /* driver want Giant */ #define D_NEEDMINOR 0x00800000 /* driver uses clone_create() */ /* * Version numbers. */ #define D_VERSION_00 0x20011966 #define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */ #define D_VERSION_02 0x28042009 /* Add d_mmap_single */ #define D_VERSION_03 0x17122009 /* d_mmap takes memattr,vm_ooffset_t */ #define D_VERSION D_VERSION_03 /* * Flags used for internal housekeeping */ #define D_INIT 0x80000000 /* cdevsw initialized */ /* * Character device switch table */ struct cdevsw { int d_version; u_int d_flags; const char *d_name; d_open_t *d_open; d_fdopen_t *d_fdopen; d_close_t *d_close; d_read_t *d_read; d_write_t *d_write; d_ioctl_t *d_ioctl; d_poll_t *d_poll; d_mmap_t *d_mmap; d_strategy_t *d_strategy; dumper_t *d_dump; d_kqfilter_t *d_kqfilter; d_purge_t *d_purge; d_mmap_single_t *d_mmap_single; int32_t d_spare0[3]; void *d_spare1[3]; /* These fields should not be messed with by drivers */ LIST_HEAD(, cdev) d_devs; int d_spare2; union { struct cdevsw *gianttrick; SLIST_ENTRY(cdevsw) postfree_list; } __d_giant; }; #define d_gianttrick __d_giant.gianttrick #define d_postfree_list __d_giant.postfree_list struct module; struct devsw_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ /* Do not initialize fields hereafter */ }; #define DEV_MODULE_ORDERED(name, evh, arg, ord) \ static moduledata_t name##_mod = { \ #name, \ evh, \ arg \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, ord) #define DEV_MODULE(name, evh, arg) \ DEV_MODULE_ORDERED(name, evh, arg, SI_ORDER_MIDDLE) void clone_setup(struct clonedevs **cdp); void clone_cleanup(struct clonedevs **); #define CLONE_UNITMASK 0xfffff #define CLONE_FLAG0 (CLONE_UNITMASK + 1) int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra); #define MAKEDEV_REF 0x01 #define MAKEDEV_WHTOUT 0x02 #define MAKEDEV_NOWAIT 0x04 #define MAKEDEV_WAITOK 0x08 #define MAKEDEV_ETERNAL 0x10 #define MAKEDEV_CHECKNAME 0x20 struct make_dev_args { size_t mda_size; int mda_flags; struct cdevsw *mda_devsw; struct ucred *mda_cr; uid_t mda_uid; gid_t mda_gid; int mda_mode; int mda_unit; void *mda_si_drv1; void *mda_si_drv2; }; void make_dev_args_init_impl(struct make_dev_args *_args, size_t _sz); #define make_dev_args_init(a) \ make_dev_args_init_impl((a), sizeof(struct make_dev_args)) int count_dev(struct cdev *_dev); void delist_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev); int destroy_dev_sched(struct cdev *dev); int destroy_dev_sched_cb(struct cdev *dev, void (*cb)(void *), void *arg); void destroy_dev_drain(struct cdevsw *csw); void drain_dev_clone_events(void); struct cdevsw *dev_refthread(struct cdev *_dev, int *_ref); struct cdevsw *devvn_refthread(struct vnode *vp, struct cdev **devp, int *_ref); void dev_relthread(struct cdev *_dev, int _ref); void dev_depends(struct cdev *_pdev, struct cdev *_cdev); void dev_ref(struct cdev *dev); void dev_refl(struct cdev *dev); void dev_rel(struct cdev *dev); struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(6, 7); struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(7, 8); struct cdev *make_dev_credf(int _flags, struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, const char *_fmt, ...) __printflike(8, 9); int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, const char *_fmt, ...) __printflike(8, 9); int make_dev_s(struct make_dev_args *_args, struct cdev **_cdev, const char *_fmt, ...) __printflike(3, 4); struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3); int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev, const char *_fmt, ...) __printflike(4, 5); int make_dev_physpath_alias(int _flags, struct cdev **_cdev, struct cdev *_pdev, struct cdev *_old_alias, const char *_physpath); void dev_lock(void); void dev_unlock(void); void setconf(void); #ifdef KLD_MODULE #define MAKEDEV_ETERNAL_KLD 0 #else #define MAKEDEV_ETERNAL_KLD MAKEDEV_ETERNAL #endif #define dev2unit(d) ((d)->si_drv0) typedef void d_priv_dtor_t(void *data); int devfs_get_cdevpriv(void **datap); int devfs_set_cdevpriv(void *priv, d_priv_dtor_t *dtr); void devfs_clear_cdevpriv(void); ino_t devfs_alloc_cdp_inode(void); void devfs_free_cdp_inode(ino_t ino); #define UID_ROOT 0 #define UID_BIN 3 #define UID_UUCP 66 #define UID_NOBODY 65534 #define GID_WHEEL 0 #define GID_KMEM 2 #define GID_TTY 4 #define GID_OPERATOR 5 #define GID_BIN 7 #define GID_GAMES 13 #define GID_VIDEO 44 #define GID_DIALER 68 #define GID_NOBODY 65534 typedef void (*dev_clone_fn)(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **result); int dev_stdclone(char *_name, char **_namep, const char *_stem, int *_unit); EVENTHANDLER_DECLARE(dev_clone, dev_clone_fn); /* Stuff relating to kernel-dump */ struct dumperinfo { dumper_t *dumper; /* Dumping function. */ - void *priv; /* Private parts. */ - u_int blocksize; /* Size of block in bytes. */ + void *priv; /* Private parts. */ + u_int blocksize; /* Size of block in bytes. */ u_int maxiosize; /* Max size allowed for an individual I/O */ - off_t mediaoffset; /* Initial offset in bytes. */ - off_t mediasize; /* Space available in bytes. */ + off_t mediaoffset; /* Initial offset in bytes. */ + off_t mediasize; /* Space available in bytes. */ + void *blockbuf; /* Buffer for padding shorter dump blocks */ }; int set_dumper(struct dumperinfo *, const char *_devname, struct thread *td); int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t); +int dump_write_pad(struct dumperinfo *, void *, vm_offset_t, off_t, size_t, + size_t *); int doadump(boolean_t); extern int dumping; /* system is dumping */ #endif /* _KERNEL */ #endif /* !_SYS_CONF_H_ */