Index: head/sys/kern/kern_shutdown.c =================================================================== --- head/sys/kern/kern_shutdown.c (revision 325784) +++ head/sys/kern/kern_shutdown.c (revision 325785) @@ -1,1487 +1,1499 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ekcd.h" #include "opt_gzio.h" #include "opt_kdb.h" #include "opt_panic.h" #include "opt_sched.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, &panic_reboot_wait_time, 0, "Seconds to wait before rebooting after a panic"); /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef KDB #ifdef KDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &debugger_on_panic, 0, "Run debugger on kernel panic"); #ifdef KDB_TRACE static int trace_on_panic = 1; #else static int trace_on_panic = 0; #endif SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RWTUN | CTLFLAG_SECURE, &trace_on_panic, 0, "Print stack trace on kernel panic"); #endif /* KDB */ static int sync_on_panic = 0; SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); +static bool poweroff_on_panic = 0; +SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, + &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); + +static bool powercycle_on_panic = 0; +SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, + &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); + static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); #ifndef DIAGNOSTIC static int show_busybufs; #else static int show_busybufs = 1; #endif SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, &show_busybufs, 0, ""); int suspend_blocked = 0; SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, &suspend_blocked, 0, "Block suspend due to a pending shutdown"); #ifdef EKCD FEATURE(ekcd, "Encrypted kernel crash dumps support"); MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); struct kerneldumpcrypto { uint8_t kdc_encryption; uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; keyInstance kdc_ki; cipherInstance kdc_ci; uint32_t kdc_dumpkeysize; struct kerneldumpkey kdc_dumpkey[]; }; #endif #ifdef GZIO struct kerneldumpgz { struct gzio_stream *kdgz_stream; uint8_t *kdgz_buf; size_t kdgz_resid; }; static struct kerneldumpgz *kerneldumpgz_create(struct dumperinfo *di, uint8_t compression); static void kerneldumpgz_destroy(struct dumperinfo *di); static int kerneldumpgz_write_cb(void *cb, size_t len, off_t off, void *arg); static int kerneldump_gzlevel = 6; SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, &kerneldump_gzlevel, 0, "Kernel crash dump gzip compression level"); #endif /* GZIO */ /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; int dumping; /* system is dumping */ int rebooting; /* system is rebooting */ static struct dumperinfo dumper; /* our selected dumper */ /* Context information for dump-debuggers. */ static struct pcb dumppcb; /* Registers. */ lwpid_t dumptid; /* Thread ID. */ static struct cdevsw reroot_cdevsw = { .d_version = D_VERSION, .d_name = "reroot", }; static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); static int kern_reroot(void); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* * The only reason this exists is to create the /dev/reroot/ directory, * used by reroot code in init(8) as a mountpoint for tmpfs. */ static void reroot_conf(void *unused) { int error; struct cdev *cdev; error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); if (error != 0) { printf("%s: failed to create device node, error %d", __func__, error); } } SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); /* * The system call that results in a reboot. */ /* ARGSUSED */ int sys_reboot(struct thread *td, struct reboot_args *uap) { int error; error = 0; #ifdef MAC error = mac_system_check_reboot(td->td_ucred, uap->opt); #endif if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { if (uap->opt & RB_REROOT) { error = kern_reroot(); } else { mtx_lock(&Giant); kern_reboot(uap->opt); mtx_unlock(&Giant); } } return (error); } /* * Called by events that want to shut down.. e.g on a PC */ void shutdown_nice(int howto) { if (initproc != NULL) { /* Send a signal to init(8) and have it shutdown the world. */ PROC_LOCK(initproc); if (howto & RB_POWEROFF) kern_psignal(initproc, SIGUSR2); else if (howto & RB_POWERCYCLE) kern_psignal(initproc, SIGWINCH); else if (howto & RB_HALT) kern_psignal(initproc, SIGUSR1); else kern_psignal(initproc, SIGINT); PROC_UNLOCK(initproc); } else { /* No init(8) running, so simply reboot. */ kern_reboot(howto | RB_NOSYNC); } } static void print_uptime(void) { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", (long)ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", (long)ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", (long)ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", (long)ts.tv_sec); } int doadump(boolean_t textdump) { boolean_t coredump; int error; error = 0; if (dumping) return (EBUSY); if (dumper.dumper == NULL) return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; coredump = TRUE; #ifdef DDB if (textdump && textdump_pending) { coredump = FALSE; textdump_dumpsys(&dumper); } #endif if (coredump) error = dumpsys(&dumper); dumping--; return (error); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void kern_reboot(int howto) { static int once = 0; #if defined(SMP) /* * Bind us to CPU 0 so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ if (!SCHEDULER_STOPPED()) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); } #endif /* We're in the process of rebooting. */ rebooting = 1; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { once = 1; bufshutdown(show_busybufs); } print_uptime(); cngrab(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * The system call that results in changing the rootfs. */ static int kern_reroot(void) { struct vnode *oldrootvnode, *vp; struct mount *mp, *devmp; int error; if (curproc != initproc) return (EPERM); /* * Mark the filesystem containing currently-running executable * (the temporary copy of init(8)) busy. */ vp = curproc->p_textvp; error = vn_lock(vp, LK_SHARED); if (error != 0) return (error); mp = vp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vp, 0); error = vfs_busy(mp, 0); vn_lock(vp, LK_SHARED | LK_RETRY); vfs_rel(mp); if (error != 0) { VOP_UNLOCK(vp, 0); return (ENOENT); } if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); vfs_unbusy(mp); return (ENOENT); } } VOP_UNLOCK(vp, 0); /* * Remove the filesystem containing currently-running executable * from the mount list, to prevent it from being unmounted * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). * * Also preserve /dev - forcibly unmounting it could cause driver * reinitialization. */ vfs_ref(rootdevmp); devmp = rootdevmp; rootdevmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); TAILQ_REMOVE(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); oldrootvnode = rootvnode; /* * Unmount everything except for the two filesystems preserved above. */ vfs_unmountall(); /* * Add /dev back; vfs_mountroot() will move it into its new place. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); mtx_unlock(&mountlist_mtx); rootdevmp = devmp; vfs_rel(rootdevmp); /* * Mount the new rootfs. */ vfs_mountroot(); /* * Update all references to the old rootvnode. */ mountcheckdirs(oldrootvnode, rootvnode); /* * Add the temporary filesystem back and unbusy it. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_unbusy(mp); return (0); } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: howto &= ~RB_HALT; break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (panic_reboot_wait_time != 0) { if (panic_reboot_wait_time != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", panic_reboot_wait_time); for (loop = panic_reboot_wait_time * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot,\n"); printf("--> or switch off the system now.\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* * Acquiring smp_ipi_mtx here has a double effect: * - it disables interrupts avoiding CPU0 preemption * by fast handlers (thus deadlocking against other CPUs) * - it avoids deadlocks against smp_rendezvous() or, more * generally, threads busy-waiting, with this spinlock held, * and waiting for responses by threads on other CPUs * (ie. smp_tlb_shootdown()). * * For the !SMP case it just needs to handle the former problem. */ #ifdef SMP mtx_lock_spin(&smp_ipi_mtx); #else spinlock_enter(); #endif /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } #if defined(WITNESS) || defined(INVARIANT_SUPPORT) static int kassert_warn_only = 0; #ifdef KDB static int kassert_do_kdb = 0; #endif #ifdef KTR static int kassert_do_ktr = 0; #endif static int kassert_do_log = 1; static int kassert_log_pps_limit = 4; static int kassert_log_mute_at = 0; static int kassert_log_panic_at = 0; static int kassert_warnings = 0; SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN, &kassert_warn_only, 0, "KASSERT triggers a panic (1) or just a warning (0)"); #ifdef KDB SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN, &kassert_do_kdb, 0, "KASSERT will enter the debugger"); #endif #ifdef KTR SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN, &kassert_do_ktr, 0, "KASSERT does a KTR, set this to the KTRMASK you want"); #endif SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN, &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN, &kassert_warnings, 0, "number of KASSERTs that have been triggered"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN, &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN, &kassert_log_pps_limit, 0, "limit number of log messages per second"); SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN, &kassert_log_mute_at, 0, "max number of KASSERTS to log"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, kassert_sysctl_kassert, "I", "set to trigger a test kassert"); static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); return (0); } /* * Called by KASSERT, this decides if we will panic * or if we will log via printf and/or ktr. */ void kassert_panic(const char *fmt, ...) { static char buf[256]; va_list ap; va_start(ap, fmt); (void)vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); /* * panic if we're not just warning, or if we've exceeded * kassert_log_panic_at warnings. */ if (!kassert_warn_only || (kassert_log_panic_at > 0 && kassert_warnings >= kassert_log_panic_at)) { va_start(ap, fmt); vpanic(fmt, ap); /* NORETURN */ } #ifdef KTR if (kassert_do_ktr) CTR0(ktr_mask, buf); #endif /* KTR */ /* * log if we've not yet met the mute limit. */ if (kassert_do_log && (kassert_log_mute_at == 0 || kassert_warnings < kassert_log_mute_at)) { static struct timeval lasterr; static int curerr; if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { printf("KASSERT failed: %s\n", buf); kdb_backtrace(); } } #ifdef KDB if (kassert_do_kdb) { kdb_enter(KDB_WHY_KASSERT, buf); } #endif atomic_add_int(&kassert_warnings, 1); } #endif /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vpanic(fmt, ap); } void vpanic(const char *fmt, va_list ap) { #ifdef SMP cpuset_t other_cpus; #endif struct thread *td = curthread; int bootopt, newpanic; static char buf[256]; spinlock_enter(); #ifdef SMP /* * stop_cpus_hard(other_cpus) should prevent multiple CPUs from * concurrently entering panic. Only the winner will proceed * further. */ if (panicstr == NULL && !kdb_active) { other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); stop_cpus_hard(other_cpus); } #endif /* * Ensure that the scheduler is stopped while panicking, even if panic * has been entered from kdb. */ td->td_stopsched = 1; bootopt = RB_AUTOBOOT; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { bootopt |= RB_DUMP; panicstr = fmt; newpanic = 1; } if (newpanic) { (void)vsnprintf(buf, sizeof(buf), fmt, ap); panicstr = buf; cngrab(); printf("panic: %s\n", buf); } else { printf("panic: "); vprintf(fmt, ap); printf("\n"); } #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif printf("time = %jd\n", (intmax_t )time_second); #ifdef KDB if (newpanic && trace_on_panic) kdb_backtrace(); if (debugger_on_panic) kdb_enter(KDB_WHY_PANIC, "panic"); #endif /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; + if (poweroff_on_panic) + bootopt |= RB_POWEROFF; + if (powercycle_on_panic) + bootopt |= RB_POWERCYCLE; kern_reboot(bootopt); } /* * Support for poweroff delay. * * Please note that setting this delay too short might power off your machine * before the write cache on your hard disk has been flushed, leading to * soft-updates inconsistencies. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); static void poweroff_wait(void *junk, int howto) { if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); void kproc_shutdown(void *arg, int howto) { struct proc *p; int error; if (panicstr) return; p = (struct proc *)arg; printf("Waiting (max %d seconds) for system process `%s' to stop... ", kproc_shutdown_wait, p->p_comm); error = kproc_suspend(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } void kthread_shutdown(void *arg, int howto) { struct thread *td; int error; if (panicstr) return; td = (struct thread *)arg; printf("Waiting (max %d seconds) for system thread `%s' to stop... ", kproc_shutdown_wait, td->td_name); error = kthread_suspend(td, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, dumpdevname, 0, "Device for kernel dumps"); static int _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length); #ifdef EKCD static struct kerneldumpcrypto * kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) { struct kerneldumpcrypto *kdc; struct kerneldumpkey *kdk; uint32_t dumpkeysize; dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); kdc->kdc_encryption = encryption; switch (kdc->kdc_encryption) { case KERNELDUMP_ENC_AES_256_CBC: if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) goto failed; break; default: goto failed; } kdc->kdc_dumpkeysize = dumpkeysize; kdk = kdc->kdc_dumpkey; kdk->kdk_encryption = kdc->kdc_encryption; memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); return (kdc); failed: explicit_bzero(kdc, sizeof(*kdc) + dumpkeysize); free(kdc, M_EKCD); return (NULL); } static int kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) { uint8_t hash[SHA256_DIGEST_LENGTH]; SHA256_CTX ctx; struct kerneldumpkey *kdk; int error; error = 0; if (kdc == NULL) return (0); /* * When a user enters ddb it can write a crash dump multiple times. * Each time it should be encrypted using a different IV. */ SHA256_Init(&ctx); SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); SHA256_Final(hash, &ctx); bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); switch (kdc->kdc_encryption) { case KERNELDUMP_ENC_AES_256_CBC: if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, kdc->kdc_iv) <= 0) { error = EINVAL; goto out; } break; default: error = EINVAL; goto out; } kdk = kdc->kdc_dumpkey; memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); out: explicit_bzero(hash, sizeof(hash)); return (error); } static uint32_t kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) { if (kdc == NULL) return (0); return (kdc->kdc_dumpkeysize); } #endif /* EKCD */ #ifdef GZIO static struct kerneldumpgz * kerneldumpgz_create(struct dumperinfo *di, uint8_t compression) { struct kerneldumpgz *kdgz; if (compression != KERNELDUMP_COMP_GZIP) return (NULL); kdgz = malloc(sizeof(*kdgz), M_DUMPER, M_WAITOK | M_ZERO); kdgz->kdgz_stream = gzio_init(kerneldumpgz_write_cb, GZIO_DEFLATE, di->maxiosize, kerneldump_gzlevel, di); if (kdgz->kdgz_stream == NULL) { free(kdgz, M_DUMPER); return (NULL); } kdgz->kdgz_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); return (kdgz); } static void kerneldumpgz_destroy(struct dumperinfo *di) { struct kerneldumpgz *kdgz; kdgz = di->kdgz; if (kdgz == NULL) return; gzio_fini(kdgz->kdgz_stream); explicit_bzero(kdgz->kdgz_buf, di->maxiosize); free(kdgz->kdgz_buf, M_DUMPER); free(kdgz, M_DUMPER); } #endif /* GZIO */ /* Registration of dumpers */ int set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, uint8_t compression, uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) { size_t wantcopy; int error; error = priv_check(td, PRIV_SETDUMPER); if (error != 0) return (error); if (di == NULL) { error = 0; goto cleanup; } if (dumper.dumper != NULL) return (EBUSY); dumper = *di; dumper.blockbuf = NULL; dumper.kdc = NULL; dumper.kdgz = NULL; if (encryption != KERNELDUMP_ENC_NONE) { #ifdef EKCD dumper.kdc = kerneldumpcrypto_create(di->blocksize, encryption, key, encryptedkeysize, encryptedkey); if (dumper.kdc == NULL) { error = EINVAL; goto cleanup; } #else error = EOPNOTSUPP; goto cleanup; #endif } wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); if (wantcopy >= sizeof(dumpdevname)) { printf("set_dumper: device name truncated from '%s' -> '%s'\n", devname, dumpdevname); } if (compression != KERNELDUMP_COMP_NONE) { #ifdef GZIO /* * We currently can't support simultaneous encryption and * compression. */ if (encryption != KERNELDUMP_ENC_NONE) { error = EOPNOTSUPP; goto cleanup; } dumper.kdgz = kerneldumpgz_create(&dumper, compression); if (dumper.kdgz == NULL) { error = EINVAL; goto cleanup; } #else error = EOPNOTSUPP; goto cleanup; #endif } dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO); return (0); cleanup: #ifdef EKCD if (dumper.kdc != NULL) { explicit_bzero(dumper.kdc, sizeof(*dumper.kdc) + dumper.kdc->kdc_dumpkeysize); free(dumper.kdc, M_EKCD); } #endif #ifdef GZIO kerneldumpgz_destroy(&dumper); #endif if (dumper.blockbuf != NULL) { explicit_bzero(dumper.blockbuf, dumper.blocksize); free(dumper.blockbuf, M_DUMPER); } explicit_bzero(&dumper, sizeof(dumper)); dumpdevname[0] = '\0'; return (error); } static int dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) { if (length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { printf("Attempt to write outside dump device boundaries.\n" "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", (intmax_t)offset, (intmax_t)di->mediaoffset, (uintmax_t)length, (intmax_t)di->mediasize); return (ENOSPC); } if (length % di->blocksize != 0) { printf("Attempt to write partial block of length %ju.\n", (uintmax_t)length); return (EINVAL); } if (offset % di->blocksize != 0) { printf("Attempt to write at unaligned offset %jd.\n", (intmax_t)offset); return (EINVAL); } return (0); } #ifdef EKCD static int dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) { switch (kdc->kdc_encryption) { case KERNELDUMP_ENC_AES_256_CBC: if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 8 * size, buf) <= 0) { return (EIO); } if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { return (EIO); } break; default: return (EINVAL); } return (0); } /* Encrypt data and call dumper. */ static int dump_encrypted_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; struct kerneldumpcrypto *kdc; int error; size_t nbytes; kdc = di->kdc; while (length > 0) { nbytes = MIN(length, sizeof(buf)); bcopy(virtual, buf, nbytes); if (dump_encrypt(kdc, buf, nbytes) != 0) return (EIO); error = dump_write(di, buf, physical, offset, nbytes); if (error != 0) return (error); offset += nbytes; virtual = (void *)((uint8_t *)virtual + nbytes); length -= nbytes; } return (0); } static int dump_write_key(struct dumperinfo *di, off_t offset) { struct kerneldumpcrypto *kdc; kdc = di->kdc; if (kdc == NULL) return (0); return (dump_write(di, kdc->kdc_dumpkey, 0, offset, kdc->kdc_dumpkeysize)); } #endif /* EKCD */ #ifdef GZIO static int kerneldumpgz_write_cb(void *base, size_t length, off_t offset, void *arg) { struct dumperinfo *di; size_t resid, rlength; int error; di = arg; if (length % di->blocksize != 0) { /* * This must be the final write after flushing the compression * stream. Write as many full blocks as possible and stash the * residual data in the dumper's block buffer. It will be * padded and written in dump_finish(). */ rlength = rounddown(length, di->blocksize); if (rlength != 0) { error = _dump_append(di, base, 0, rlength); if (error != 0) return (error); } resid = length - rlength; memmove(di->blockbuf, (uint8_t *)base + rlength, resid); di->kdgz->kdgz_resid = resid; return (EAGAIN); } return (_dump_append(di, base, 0, length)); } #endif /* GZIO */ /* * Write a kerneldumpheader at the specified offset. The header structure is 512 * bytes in size, but we must pad to the device sector size. */ static int dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, off_t offset) { void *buf; size_t hdrsz; hdrsz = sizeof(*kdh); if (hdrsz > di->blocksize) return (ENOMEM); if (hdrsz == di->blocksize) buf = kdh; else { buf = di->blockbuf; memset(buf, 0, di->blocksize); memcpy(buf, kdh, hdrsz); } return (dump_write(di, buf, 0, offset, di->blocksize)); } /* * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to * protect us from metadata and metadata from us. */ #define SIZEOF_METADATA (64 * 1024) /* * Do some preliminary setup for a kernel dump: initialize state for encryption, * if requested, and make sure that we have enough space on the dump device. * * We set things up so that the dump ends before the last sector of the dump * device, at which the trailing header is written. * * +-----------+------+-----+----------------------------+------+ * | | lhdr | key | ... kernel dump ... | thdr | * +-----------+------+-----+----------------------------+------+ * 1 blk opt <------- dump extent --------> 1 blk * * Dumps written using dump_append() start at the beginning of the extent. * Uncompressed dumps will use the entire extent, but compressed dumps typically * will not. The true length of the dump is recorded in the leading and trailing * headers once the dump has been completed. */ int dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) { uint64_t dumpextent; uint32_t keysize; #ifdef EKCD int error = kerneldumpcrypto_init(di->kdc); if (error != 0) return (error); keysize = kerneldumpcrypto_dumpkeysize(di->kdc); #else keysize = 0; #endif dumpextent = dtoh64(kdh->dumpextent); if (di->mediasize < SIZEOF_METADATA + dumpextent + 2 * di->blocksize + keysize) { #ifdef GZIO if (di->kdgz != NULL) { /* * We don't yet know how much space the compressed dump * will occupy, so try to use the whole swap partition * (minus the first 64KB) in the hope that the * compressed dump will fit. If that doesn't turn out to * be enouch, the bounds checking in dump_write() * will catch us and cause the dump to fail. */ dumpextent = di->mediasize - SIZEOF_METADATA - 2 * di->blocksize - keysize; kdh->dumpextent = htod64(dumpextent); } else #endif return (E2BIG); } /* The offset at which to begin writing the dump. */ di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - dumpextent; return (0); } static int _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length) { int error; #ifdef EKCD if (di->kdc != NULL) error = dump_encrypted_write(di, virtual, physical, di->dumpoff, length); else #endif error = dump_write(di, virtual, physical, di->dumpoff, length); if (error == 0) di->dumpoff += length; return (error); } /* * Write to the dump device starting at dumpoff. When compression is enabled, * writes to the device will be performed using a callback that gets invoked * when the compression stream's output buffer is full. */ int dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, size_t length) { #ifdef GZIO void *buf; if (di->kdgz != NULL) { /* Bounce through a buffer to avoid gzip CRC errors. */ if (length > di->maxiosize) return (EINVAL); buf = di->kdgz->kdgz_buf; memmove(buf, virtual, length); return (gzio_write(di->kdgz->kdgz_stream, buf, length)); } #endif return (_dump_append(di, virtual, physical, length)); } /* * Write to the dump device at the specified offset. */ int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { int error; error = dump_check_bounds(di, offset, length); if (error != 0) return (error); return (di->dumper(di->priv, virtual, physical, offset, length)); } /* * Perform kernel dump finalization: flush the compression stream, if necessary, * write the leading and trailing kernel dump headers now that we know the true * length of the dump, and optionally write the encryption key following the * leading header. */ int dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) { uint64_t extent; uint32_t keysize; int error; extent = dtoh64(kdh->dumpextent); #ifdef EKCD keysize = kerneldumpcrypto_dumpkeysize(di->kdc); #else keysize = 0; #endif #ifdef GZIO if (di->kdgz != NULL) { error = gzio_flush(di->kdgz->kdgz_stream); if (error == EAGAIN) { /* We have residual data in di->blockbuf. */ error = dump_write(di, di->blockbuf, 0, di->dumpoff, di->blocksize); di->dumpoff += di->kdgz->kdgz_resid; di->kdgz->kdgz_resid = 0; } if (error != 0) return (error); /* * We now know the size of the compressed dump, so update the * header accordingly and recompute parity. */ kdh->dumplength = htod64(di->dumpoff - (di->mediaoffset + di->mediasize - di->blocksize - extent)); kdh->parity = 0; kdh->parity = kerneldump_parity(kdh); gzio_reset(di->kdgz->kdgz_stream); } #endif /* * Write kerneldump headers at the beginning and end of the dump extent. * Write the key after the leading header. */ error = dump_write_header(di, kdh, di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - keysize); if (error != 0) return (error); #ifdef EKCD error = dump_write_key(di, di->mediaoffset + di->mediasize - di->blocksize - extent - keysize); if (error != 0) return (error); #endif error = dump_write_header(di, kdh, di->mediaoffset + di->mediasize - di->blocksize); if (error != 0) return (error); (void)dump_write(di, NULL, 0, 0, 0); return (0); } void dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, char *magic, uint32_t archver, uint64_t dumplen) { size_t dstsize; bzero(kdh, sizeof(*kdh)); strlcpy(kdh->magic, magic, sizeof(kdh->magic)); strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); kdh->version = htod32(KERNELDUMPVERSION); kdh->architectureversion = htod32(archver); kdh->dumplength = htod64(dumplen); kdh->dumpextent = kdh->dumplength; kdh->dumptime = htod64(time_second); #ifdef EKCD kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdc)); #else kdh->dumpkeysize = 0; #endif kdh->blocksize = htod32(di->blocksize); strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); dstsize = sizeof(kdh->versionstring); if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) kdh->versionstring[dstsize - 2] = '\n'; if (panicstr != NULL) strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); #ifdef GZIO if (di->kdgz != NULL) kdh->compression = KERNELDUMP_COMP_GZIP; #endif kdh->parity = kerneldump_parity(kdh); } #ifdef DDB DB_SHOW_COMMAND(panic, db_show_panic) { if (panicstr == NULL) db_printf("panicstr not set\n"); else db_printf("panic: %s\n", panicstr); } #endif Index: head/tools/tools/sysdoc/tunables.mdoc =================================================================== --- head/tools/tools/sysdoc/tunables.mdoc (revision 325784) +++ head/tools/tools/sysdoc/tunables.mdoc (revision 325785) @@ -1,2362 +1,2376 @@ # $FreeBSD$ --- debug.disablecwd bool Determines whether or not the .Xr getwcd 3 system call should be allowed. --- debug.disablefullpath bool Determines whether or not the .Fn vn_fullpath function may be used. --- debug.dobkgrdwrite bool Determines if background writes should be performed. --- debug.hashstat.nchash struct Displays nchash chain lengths. This is a read-only variable. --- debug.hashstat.rawnchash --- debug.ieee80211 bool This .Nm allows you to enable or disable debugging for 802.11 devices. --- debug.kdb.available variable Used to retrieve a list of currently available debugger backends. --- debug.kdb.current variable Allows for the selection of the debugger backend which is used to handle debugger requests. --- debug.kdb.enter variable When written to, the system should break to the debugger. --- debug.malloc.failure_count bool Number of times a coerced malloc failure has occurred as a result of .Va debug.malloc.failure_rate . Useful for tracking what might have happened and whether failures are being generated. --- debug.malloc.failure_rate bool Debugging feature causing .Dv M_NOWAIT allocations to fail at a specified rate. How often to generate a failure: if set to 0 (default), this feature is disabled. In other words if set to 10 (one in ten .Xr malloc 3 calls will fail). --- debug.rman_debug bool This .Nm allows you to enable or disable debugging for .Xr rman 9 , the .Fx resource manager. --- debug.sizeof.bio --- debug.sizeof.buf --- debug.sizeof.cdev --- debug.sizeof.devstat --- debug.sizeof.kinfo_proc --- debug.sizeof.proc --- debug.sizeof.vnode --- debug.vnlru_nowhere --- hw.acpi.cpu.current_speed bool Display the current CPU speed. This is adjustable, but doing so is not recommended. --- hw.acpi.cpu.max_speed int Allows you to change the stepping for processor speed on machines which support .Xr acpi 4 . --- hw.acpi.disable_on_poweroff bool Some systems using .Xr acpi 4 have problems powering off when shutting down with .Xr acpi 4 enabled. This .Nm disables .Xr acpi 4 when rebooting and shutting down. --- hw.acpi.s4bios bool This .Nm determines whether or not the S4BIOS sleep implementation should be used. --- hw.acpi.sleep_delay int Set the sleep delay for .Xr acpi 4 . --- hw.acpi.supported_sleep_state bool List supported .Tn ACPI sleep states --- hw.acpi.thermal.min_runtime --- hw.acpi.thermal.polling_rate int The interval in seconds that should be used to check the current system temperature. --- hw.acpi.thermal.tz0.temperature str Displays the current temperature. This is a read-only variable. --- hw.acpi.thermal.tz0.thermal_flags --- hw.acpi.verbose bool Determines whether or not .Xr acpi 4 should be verbose. --- hw.ata.ata_dma bool Allows the enabling and disabling of DMA for ATA devices. --- hw.ata.atapi_dma bool Allows the enabling and disabling of DMA for atapi devices, such as CD-ROM drives. --- hw.ata.tags bool An experimental feature for IDE hard drives which allows write caching to be turned on. Please read the .Xr tuning 7 manual page carefully before using this. --- hw.ata.wc bool Determines whether or not IDE write caching should be turned on or off. See .Xr tuning 7 for more information. --- hw.bus.devices --- hw.bus.info int This is an internally used function that returns the kernel bus interface version. --- hw.bus.rman --- hw.busdmafree_bpages --- hw.busdma.reserved_bpages --- hw.busdma.active_bpages --- hw.busdma.total_bpages --- hw.busdma.total_bounced --- hw.busdma.total_deferred --- hw.byteorder int Returns the system byte order. This is a read-only variable. --- hw.cardbus.cis_debug --- hw.cardbus.debug --- hw.cbb.debug --- hw.cbb.start_16_io --- hw.cbb.start_32_io --- hw.cbb.start_memory --- hw.floatingpoint bool Reports true if the machine has a floating point processor. This is a read-only variable. --- hw.fxp0.bundle_max int Controls the receive interrupt microcode bundle size limit for the .Xr fxp 4 device. --- hw.fxp0.int_delay int Controls the receive interrupt microcode bundling delay for the .Xr fxp 4 device. --- hw.fxp_noflow bool Disables flow control support on .Xr fxp 4 cards. When flow control is enabled, and if the operating system does not acknowledge the packet buffer filling, the card will begin to generate Ethernet quench packets, but appears to get into a feedback loop of some sort, hosing local switches. This is a workaround for this issue. --- hw.fxp_rnr int Set the amount of times that a no-resource condition may occur before the .Xr fxp 4 device may restart. --- hw.instruction_sse bool Returns true if SSE support is enabled in the kernel. This is a read-only variable. --- hw.intrcnt bool Displays a list of interrupt counters. This is a read-only variable. --- hw.intrnames str Displays a list of zero-terminated interrupt names. This is a read-only variable. --- hw.kbd.keymap_restrict_change bool This sysctl acts as a sort of secure-level, allowing control of the console keymap. Giving this a value of 1 means that only the root user can change restricted keys (like boot, panic...). A value of 2 means that only root can change restricted keys and regular keys. Regular users still can change accents and function keys. A value of 3 means only root can change restricted, regular and accent keys, while a value of 4 means that no changes to the keymap are allowed by anyone other than the root user. --- hw.machine str Displays the machine class. This is a read-only variable. --- hw.machine_arch str Displays the current architecture. This is a read-only variable. --- hw.model str Displays the model information of the current running hardware. This is a read-only variable. --- hw.ncpu bool Report the number of CPU's in the system. This is a read-only variable. --- hw.pagesize int Displays the current .Xr pagesize 1 . This is a read-only variable. --- hw.pccard.cis_debug int Allows debugging to be turned on or off for CIS. --- hw.pccard.debug bool Determines whether or not to use debugging for the PC Card bus driver. --- hw.pci.allow_unsupported_io_range bool Some machines do not detect their CardBus slots correctly because they use unsupported I/O ranges. This .Nm allows FreeBSD to use those ranges. --- hw.pci.enable_io_modes --- hw.snd.pcm0.ac97rate --- hw.snd.verbose int Control the level of verbosity for the .Pa /dev/sndstat device. See the .Xr pcm 4 man page for more information on debug levels. --- hw.snd.report_soft_formats bool Controls the internal format conversion if it is available transparently to the application software. See .Xr pcm 4 for more information. --- hw.syscons.bell bool Allows you to control whether or not to use the 'bell' while using the console. This is turned on by default. --- hw.syscons.saver.keybonly bool This variable tells the system that the screen saver may only wake up if the keyboard is used. This means that log messages that are pushed to the console will not cause the screen saver to stop, and display the log message will not display. This can be disabled to mimic the behavior of older syscons. --- hw.syscons.sc_no_suspend_vtswitch bool Disables switching between virtual terminals during suspend or resume. See .Xr syscons 4 for more information. --- hw.wi.debug bool Controls the level of debugging for .Xr wi 4 devices. --- hw.wi.txerate int This value allows controls the maximum amount of error messages per second. Giving this .Nm a value of 0 (zero) disables error messages completely. --- kern.acct_chkfreq int Specifies the frequency (in minutes) with which free disk space should be checked. This is used in conjunction with .Va kern.acct_resume and .Va kern.acct_suspend. --- kern.acct_resume int The percentage of free disk space above which process accounting will resume. --- kern.acct_suspend int The percentage of free disk space below which process accounting stops. --- kern.argmax bool The maximum number of bytes that can be used in an argument to .Xr execve 2 . This is basically the maximum number of characters which can be used in a single command line. On some rare occasions, this value needs altering. If so, please check out the .Xr xargs 1 utility. --- kern.bootfile str The kernel which was used to boot the system. --- kern.boottime str The time at which the current kernel became active after the system booted. This is a read-only variable. --- kern.chroot_allow_open_directories bool Depending on the setting of this variable, open file descriptors which reference directories will fail. If set to .Em 0 , .Xr chroot 8 will always fail with .Er EPERM if there are any directories open. If set to .Em 1 (the default), .Xr chroot 8 will fail with .Er EPERM if there are any directories open and the process is already subject to the .Xr chroot 8 system call. Any other value will bypass the check for open directories. Please see the .Xr chroot 2 man page for more information. --- kern.clockrate struct Displays information about the system clock. This is a read-only variable. --- kern.console --- kern.coredump bool Determines where the kernel should dump a core file in the event of a kernel panic. --- kern.corefile str Describes the file name that a core image should be stored to. See the .Xr core 5 man page for more information on this variable. --- kern.cp_time struct Contains CPU time statistics. This is a read-only variable. --- kern.devname struct An internally used .Nm that returns suitable device names for the .Fn devname function. See the .Xr devname 3 manual page for more information. --- kern.devstat.all struct An internally used .Nm that returns current devstat statistics as well as the current devstat generation number. See the .Xr devstat 3 man page for more information. --- kern.devstat.generation --- kern.devstat.numdevs --- kern.devstat.version int Displays the devstat list version number. This is a read-only variable. --- kern.disks str Display disk devices that the kernel is currently aware of. This is a read-only variable. --- kern.domainname str This shows the name of the current YP/NIS domain. --- kern.drainwait int The time to wait after dropping DTR to the given number. The units are measured in hundredths of a second. The default is 300 hundredths, i.e., 3 seconds. This option is needed mainly to set proper recover time after modem resets. --- kern.elf32.fallback_brand --- kern.fallback_elf_brand --- kern.file struct Returns the entire file structure. --- kern.function_list struct Returns all functions names in the kernel. --- kern.geom.confdot --- kern.geom.conftxt --- kern.geom.confxml --- kern.hostid int This .Nm may contain the IP address of the system. --- kern.hostname str Display the system hostname. This can be modified with the .Xr hostname 1 utility. --- kern.init_path string The path to search for the .Xr init 8 process. This is a read-only variable. --- kern.iov_max --- kern.ipc.clust_hiwm --- kern.ipc.clust_lowm --- kern.ipc.maxsockbuf int The maximum buffer size that may be allocated for sockets. See .Xr getsockopt 2 for more information. --- kern.ipc.maxsockets int The maximum number of sockets available. --- kern.ipc.mb_statpcpu --- kern.ipc.mbstat --- kern.ipc.mbuf_hiwm --- kern.ipc.mbuf_lowm --- kern.ipc.mbuf_wait --- kern.ipc.msqids --- kern.ipc.nmbclusters bool Maximum number of mbuf clusters available. The kernel uses a preallocated pool of .Dq mbuf clusters for the .Xr mbuf 9 allocator. The pool size is tuned by the kernel during boot. That size is set to a value which seems appropriate for the current system. --- kern.ipc.nmbcnt --- kern.ipc.nmbufs --- kern.ipc.nsfbufs --- kern.ipc.numopensockets --- kern.ipc.somaxconn int The maximum pending socket connection queue size. --- kern.ipc.zero_copy.receive bool When set to a non-zero value, zero copy is enabled for received packets. This reduces copying of data around for outgoing packets and can significantly improve throughput for network connections. --- kern.ipc.zero_copy.send bool When set to a non-zero value, zero copy is enabled for sent packets. This reduces copying of data around for outgoing packets and can significantly improve throughput for network connections. --- kern.job_control bool Reports whether or not job control is available. This is a read-only variable. --- kern.kq_calloutmax --- kern.lastpid int Displays the last PID used by a process. This is a read-only variable. --- kern.logsigexit bool Tells the kernel whether or not to log fatal signal exits. --- kern.malloc str Displays how memory is currently being allocated. This is a read-only variable. --- kern.maxfiles int The maximum number of files allowed for all the processes of the running kernel. You can override the default value which the kernel calculates by explicitly setting this to a non-zero value. Also see the .Xr tuning 7 man page for more information. --- kern.maxfilesperproc int The maximum number of files any one process can open. See the .Xr ps 1 utility for more information on monitoring processes. --- kern.maxproc int The maximum number of processes that the system can be running at any time. See the .Xr ps 1 utility for more information on monitoring processes. --- kern.maxprocperuid int The maximum number of processes one user ID can run. See the .Xr ps 1 utility for more information on monitoring processes. --- kern.maxusers int Controls the scaling of a number of static system tables, including defaults for the maximum number of open files, sizing of network memory resources, etc. See the .Xr tuning 7 man page for more information. This .Nm cannot be set using .Xr sysctl 8 . Use .Xr loader 8 instead to set this at boot time. --- kern.maxvnodes bool The maximum number of .Em vnodes (virtual file system nodes) the system can have open simultaneously. --- kern.minvnodes bool The minimun number of .Em vnodes (virtual file system nodes) the system can have open simultaneously. --- kern.module_path str This .Nm holds a colon-separated list of directories in which the kernel will search for loadable kernel modules. This path is search when using commands such as .Xr kldload 8 and .Xr kldunload 8 . --- kern.msgbuf string Contains the kernel message buffer. --- kern.msgbuf_clear bool Giving this .Nm a value of 1 (one) will cause the kernel message buffer to be cleared. It should be noted though, that the .Nm will then automatically revert back to it's original value of 0 (zero). --- kern.ngroups int Contains the maximum number of groups that a user may belong to. This is a read-only variable. --- kern.openfiles int Shows the current amount of system-wide open files. This is useful when used in conjunction with .Va kern.maxfiles for tuning your system. This is a read-only variable. --- kern.osreldate string Displays the kernel release date. This is a read-only variable. --- kern.osrelease str Displays the current version of .Fx running. This is a read-only variable. --- kern.osrevision string Displays the operating system revision. This is a read-only variable. --- kern.ostype str Alter the name of the current operating system. Changing this will change the output from the .Xr uname 1 utility. Changing the default is not recommended. --- kern.posix1version string Returns the version of .Tn POSIX that the system is attempting to comply with. This is a read-only variable. --- +kern.powercycle_on_panic +bool + +In the event of a panic, this variable controls whether or not the +system should try to power cycle instead of rebooting. + +--- +kern.poweroff_on_panic +bool + +In the event of a panic, this variable controls whether or not the +system should try to power off instead of rebooting. + +--- kern.proc.all --- kern.proc.args int Allows a process to retrieve the argument list or process title for another process without looking in the address space of another program. This is a read-only variable. --- kern.proc.pgrp --- kern.proc.pid struct This internally used .Nm may be used to extract process information. See .Xr sysctl 3 for an example. --- kern.proc.ruid --- kern.proc.tty --- kern.proc.uid --- kern.ps_argsopen bool By setting this to 0, command line arguments are hidden for processes which you are not running. This is useful on multi-user machines where things like passwords might accidentally be added to command line programs. --- kern.quantum --- kern.random.adaptors str Displays registered PRNG adaptors. This is a read-only variable. --- kern.random.sys.burst --- kern.random.sys.harvest.ethernet --- kern.random.sys.harvest.interrupt --- kern.random.sys.harvest.point_to_point --- kern.random.sys.harvest.swi --- kern.random.sys.seeded --- kern.random.yarrow.bins --- kern.random.yarrow.fastthresh --- kern.random.yarrow.gengateinterval --- kern.random.yarrow.slowoverthresh --- kern.random.yarrow.slowthresh --- kern.randompid --- kern.rootdev string Displays the current root file system device. This is a read-only variable. --- kern.saved_ids bool Displays whether or not saved set-group/user ID is available. This is a read-only variable. --- kern.securelevel bool The current kernel security level. See the .Xr init 8 manual page for a good description about what a security level is. --- kern.sugid_coredump bool By default, a process that changes user or group credentials whether real or effective will not create a corefile. This behavior can be changed to generate a core dump by setting this variable to 1. --- kern.sync_on_panic bool In the event of a panic, this variable controls whether or not the system should try and .Xr sync 8 . In some circumstances, this could cause a double panic, and as a result, this may be turned off if needed. --- kern.threads.debug bool Determines whether to use debugging for kernel threads. This is useful for testing. --- kern.threads.max_groups_per_proc --- kern.threads.max_threads_hits --- kern.threads.max_threads_per_proc --- kern.threads.virtual_cpu int The maximum amount of virtual CPU's that be used for threading. --- kern.tty_nin --- kern.tty_nout --- kern.ttys bool Used internally by the .Xr pstat 8 command. This is a read-only variable. --- kern.version str Displays the current kernel version information. This is a read-only variable. --- machdep.acpi_root --- machdep.cpu_idle_hlt bool Halt idle CPUs. This is good for an SMP system. --- machdep.disable_mtrrs --- machdep.guessed_bootdev --- machdep.hyperthreading_allowed bool Setting this tunable to zero disables the use of additional logical processors provided by Intel HTT technology. --- machdep.panic_on_nmi --- machdep.siots --- net.inet.accf.unloadable --- net.inet.icmp.bmcastecho --- net.inet.icmp.drop_redirect --- net.inet.icmp.icmplim --- net.inet.icmp.icmplim_output --- net.inet.icmp.log_redirect --- net.inet.icmp.maskfake --- net.inet.icmp.maskrepl --- net.inet.ip.accept_sourceroute bool Controls forwarding of source-routed IP packets. --- net.inet.ip.check_interface bool This .Nm verifies that packets arrive on the correct interfaces. --- net.inet.ip.fastforwarding bool When fast forwarding is enabled, IP packets are forwarded directly to the appropriate network interface with a minimal validity checking, which greatly improves throughput. Please see the .Xr inet 4 man page for more information. --- net.inet.ip.forwarding bool Act as a gateway machine and forward packets. This can also be configured using the gateway_enable value in .Pa /etc/rc.conf --- net.inet.ip.fw.one_pass int --- net.inet.ip.intr_queue_drops --- net.inet.ip.intr_queue_maxlen --- net.inet.ip.maxfragpackets --- net.inet.ip.maxfragsperpacket --- net.inet.ip.redirect bool Controls the sending of ICMP redirects in response to unforwardable IP packets. --- net.inet.ip.sourceroute bool Determines whether or not source routed IP packets should be forwarded. --- net.inet.ip.stats --- net.inet.ip.ttl int The TTL (time-to-live) to use for outgoing packets. --- net.inet.raw.maxdgram --- net.inet.raw.olddiverterror --- net.inet.raw.pcblist --- net.inet.raw.recvspace --- net.inet.tcp.always_keepalive bool Determines whether or not to attempt to detect dead TCP connections by sending 'keepalives' intermittently. This is enabled by default and can also be configured using the tcp_keepalive value in .Pa /etc/rc.conf --- net.inet.tcp.blackhole bool Manipulates system behavior when connection requests are received on a TCP port without a socket listening. See the .Xr blackhole 4 man page for more information. --- net.inet.tcp.delacktime --- net.inet.tcp.delayed_ack bool Historically speaking, this feature was designed to allow the acknowledgment to transmitted data to be returned along with the response. See the .Xr tuning 7 man page for more information. --- net.inet.tcp.do_tcpdrain --- net.inet.tcp.getcred --- net.inet.tcp.icmp_may_rst --- net.inet.tcp.inflight_debug bool Control debugging for the .Va net.inet.tcp.inflight_enable .Nm . Please see the .Xr tuning 7 man page for more information. --- net.inet.tcp.inflight_enable bool Turns on bandwidth delay product limiting for all TCP connections. Please see the .Xr tuning 7 man page for more information. --- net.inet.tcp.inflight_max bool .Em double check The maximum amount of data that may be queued for bandwidth delay product limiting. --- net.inet.tcp.inflight_min bool .Em double check The minimum amount of data that may be queued for bandwidth delay product limiting. --- net.inet.tcp.inflight_stab bool This parameter represents the maximal packets added to the bandwidth delay product window calculation. Changing this is not recommended. --- net.inet.tcp.isn_reseed_interval --- net.inet.tcp.local_slowstart_flightsize --- net.inet.tcp.log_in_vain bool Allows the system to log connections to TCP ports that do not have sockets listening. This variable can also be tuned by changing the value for log_in_vain in .Pa /etc/rc.conf --- net.inet.tcp.minmss bool Enable for network link optimization TCP can adjust its MSS and thus packet size according to the observed path MTU. This is done dynamically based on feedback from the remote host and network components along the packet path. This information can be abused to pretend an extremely low path MTU. --- net.inet.tcp.minmssoverload bool The PSS rate for the .Va net.inet.tcp.minmss sysctl. Setting this will force packets to be reset and dropped, this should hinder the availability of DoS attacks on WWW servers using POST attacks. --- net.inet.tcp.msl --- net.inet.tcp.mssdflt bool This is the default TCP Maximum Segment Size for TCP packets. The default setting is recommended in most cases. --- net.inet.tcp.v6mssdflt bool This is the default TCP Maximum Segment Size for TCP IPv6 packets. The default setting is recommend in most cases. --- net.inet.tcp.newreno --- net.inet.tcp.path_mtu_discovery --- net.inet.tcp.pcbcount --- net.inet.tcp.pcblist --- net.inet.tcp.recvspace bool This variables controls the amount of receive buffer space for any given TCP connection. This can be particularly useful when tuning network applications. See the .Xr tuning 7 man page for more information. --- net.inet.tcp.rexmit_min --- net.inet.tcp.rexmit_slop --- net.inet.tcp.rfc1323 bool Determines whether support for RFC1323 (TCP Extensions for High Performance) should be enabled. This variable can also be tuned by changing the value for tcp_extensions in .Pa /etc/rc.conf --- net.inet.tcp.rfc1644 --- net.inet.tcp.rfc3042 --- net.inet.tcp.rfc3390 --- net.inet.tcp.sendspace bool This variables controls the amount of send buffer space for any given TCP connection. This can be particularly useful when tuning network applications. See the .Xr tuning 7 manual page for more information. --- net.inet.tcp.slowstart_flightsize --- net.inet.tcp.stats --- net.inet.tcp.syncache.bucketlimit --- net.inet.tcp.syncache.cachelimit --- net.inet.tcp.syncache.count --- net.inet.tcp.syncache.hashsize --- net.inet.tcp.syncache.rexmtlimit --- net.inet.tcp.syncookies --- net.inet.tcp.tcbhashsize --- net.inet.tcp.v6mssdflt --- net.inet.udp.blackhole bool Manipulates system behavior when connection requests are received on a UDP port. See the .Xr blackhole 4 man page for more information. --- net.inet.udp.getcred --- net.inet.udp.log_in_vain bool Allows the system to log connections to UDP ports that do not have sockets listening. This variable can also be tuned by changing the value for log_in_vain in .Pa /etc/rc.conf --- net.inet.udp.maxdgram --- net.inet.udp.pcblist --- net.inet.udp.recvspace --- net.inet.udp.stats --- net.inet6.icmp6.errppslimit --- net.inet6.icmp6.nd6_debug --- net.inet6.icmp6.nd6_delay --- net.inet6.icmp6.nd6_maxnudhint --- net.inet6.icmp6.nd6_mmaxtries --- net.inet6.icmp6.nd6_prune --- net.inet6.icmp6.nd6_umaxtries --- net.inet6.icmp6.nd6_useloopback --- net.inet6.icmp6.nodeinfo --- net.inet6.icmp6.rediraccept --- net.inet6.icmp6.redirtimeout --- net.inet6.tcp6.getcred --- net.inet6.udp6.getcred --- net.isr.enable --- net.link.ether.inet.log_arp_movements --- net.link.ether.inet.log_arp_wrong_iface --- net.link.ether.ipfw --- net.link.generic.ifdata --- net.link.generic.system.ifcount --- net.link.gif.max_nesting bool Determines whether to allow recursive tunnels or not. --- net.link.gif.parallel_tunnels bool Determines whether to allow parallel tunnels or not. --- net.local.dgram.pcblist --- net.local.stream.pcblist --- security.bsd.see_other_uids bool Turning this option on will prevent users from viewing information about processes running under other user id numbers (UIDs). --- security.bsd.suser_enabled --- security.bsd.unprivileged_proc_debug --- security.bsd.unprivileged_read_msgbuf --- security.jail.set_hostname_allowed bool Determines whether or not the root user within the jail can set the hostname. --- security.jail.socket_unixiproute_only --- security.jail.sysvipc_allowed --- security.mac.biba.enabled bool Enables enforcement of the Biba integrity policy. --- security.mac.biba.ptys_equal bool Label .Sm off .Xr pty 4 s .Sm on as .Dq biba/equal upon creation. --- security.mac.biba.revocation_enabled bool Revoke access to objects if the label is changed to dominate the subject. --- security.mac.enforce_fs bool Enforce MAC policies for file system accesses. --- security.mac.enforce_kld bool Enforce MAC policies on .Xr kld 4 . --- security.mac.enforce_network bool Enforce MAC policies on network interfaces. --- security.mac.enforce_pipe bool Enforce MAC policies on pipes. --- security.mac.enforce_process bool Enforce MAC policies between system processes (e.g. .Xr ps 1 , .Xr ktrace 2 ). --- security.mac.enforce_socket bool Enforce MAC policies on sockets. --- security.mac.enforce_system bool Enforce MAC policies on system-related items (e.g. .Xr kenv 1 , .Xr acct 2 , .Xr reboot 2 ). --- security.mac.enforce_vm bool Enforce MAC policies on .Xr mmap 2 and .Xr mprotect 2 . --- security.mac.ifoff.lo_enabled bool Use this too disable network traffic over the loopback .Xr lo 4 interface. See .Xr mac_ifoff 4 for more information. --- security.mac.ifoff.other_enabled bool Use this to enable network traffic over other interfaces. See .Xr mac_ifoff 4 for more information. --- security.mac.ifoff.bpfrecv_enabled bool Use this too allow .Xr bpf 4 traffic to be received, even while other traffic is disabled. --- security.mac.mls.enabled bool Enables the enforcement of the MLS confidentiality policy, see .Xr mac_mls 4 for more information. --- security.mac.mls.ptys_equal bool Label .Sm off .Xr pty 4 s .Sm on as .Dq mls/equal upon creation. --- security.mac.mls.revocation_enabled bool Revoke access to objects if the label is changed to a more sensitive level than the subject. --- security.mac.portacl.rules str The port access control list is specified in the following format: .Sy idtype .Li : .Sy id .Li : .Sy protocol .Li : .Sy port .Li [, .Sy idtype .Li : .Sy id .Li : .Sy protocol .Li : .Sy port .Li ,...] .Sy idtype Describes the type of subject match to be performed. Either .Li uid for userid matching, or .Li gid for group ID matching. .Sy id The user or group ID (depending on .Sy idtype ) allowed to bind to the specified port. .Bf -emphasis NOTE: User and group names are not valid; only the actual ID numbers may be used. .Ef .Sy protocol Describes which protocol this entry applies to. Either .Li tcp or .Li udp are supported. .Sy port Describes which port this entry applies to. .Bf -emphasis NOTE: MAC security policies may not override other security system policies by allowing accesses that they may deny, such as .Va net.inet.ip.portrange.reservedlow / .Va net.inet.ip.portrange.reservedhigh . .Ef --- security.mac.seeotheruids.enabled bool Enable/disable .Va security.mac.seeotheruids See .Xr mac_seeotheruids 4 for more information. --- security.mac.seeotheruids.primarygroup_enabled bool Allow users to see processes and sockets owned by the same primary group. --- security.mac.seeotheruids.specificgid_enabled bool Allow processes with a specific group ID to be exempt from the policy, set this to .Li 1 and set .Va security.mac.seeotheruids.specificgid to the gid to be exempted. --- security.mac_test str Used for debugging. See .Xr mac_test 4 for more information. --- user.bc_base_max --- user.bc_dim_max --- user.bc_scale_max --- user.bc_string_max --- user.coll_weights_max --- user.cs_path --- user.line_max --- user.posix2_c_bind --- user.posix2_c_dev --- user.posix2_fort_dev --- user.posix2_fort_run --- user.posix2_localedef --- user.posix2_sw_dev --- user.posix2_upe --- user.posix2_version --- user.re_dup_max --- user.stream_max --- user.tzname_max --- vfs.altbufferflushes --- vfs.bufdefragcnt --- vfs.buffreekvacnt --- vfs.bufmallocspace --- vfs.bufreusecnt --- vfs.bufspace --- vfs.cache.nchstats --- vfs.conflist --- vfs.devfs.generation --- vfs.devfs.inodes --- vfs.devfs.noverflow --- vfs.devfs.topinode --- vfs.dirtybufferflushes --- vfs.dirtybufthresh --- vfs.ffs.adjblkcnt --- vfs.ffs.adjrefcnt --- vfs.ffs.freeblks --- vfs.ffs.freedirs --- vfs.ffs.freefiles --- vfs.ffs.setflags --- vfs.flushwithdeps --- vfs.getnewbufcalls --- vfs.getnewbufrestarts --- vfs.hibufspace --- vfs.hidirtybuffers --- vfs.hifreebuffers --- vfs.hirunningspace --- vfs.lobufspace --- vfs.lodirtybuffers --- vfs.lofreebuffers --- vfs.lorunningspace --- vfs.maxbufspace --- vfs.maxmallocbufspace --- vfs.numdirtybuffers --- vfs.numfreebuffers --- vfs.opv_numops --- vfs.pfs.vncache.entries --- vfs.pfs.vncache.hits --- vfs.pfs.vncache.maxentries --- vfs.pfs.vncache.misses --- vfs.read_max --- vfs.recursiveflushes --- vfs.runningbufspace --- vfs.ufs.dirhash_docheck --- vfs.ufs.dirhash_maxmem --- vfs.ufs.dirhash_mem --- vfs.ufs.dirhash_minsize --- vfs.usermount bool This .Nm allows the root user to grant access to non-root users so that they may mount floppy and CD-ROM drives. --- vfs.vmiodirenable bool Controls how directories are cached by the system. This is turned on by default. See the .Xr tuning 7 man page for a more detailed explanation on this variable. --- vfs.write_behind bool Tells the file system to issue media writes as full clusters are collected, which typically occurs when writing large sequential files. This is turned on by default, but under certain circumstances may stall processes and can therefore be turned off. --- vm.disable_swapspace_pageouts --- vm.dmmax --- vm.kvm_free --- vm.kvm_size --- vm.loadavg struct Displays the load average history. This is a read-only variable. --- vm.max_launder --- vm.nswapdev int Displays the number of swap devices available to the system. This is a read-only variable. --- vm.pageout_full_stats_interval --- vm.pageout_lock_miss --- vm.pageout_stats_free_max --- vm.pageout_stats_interval --- vm.pageout_stats_max --- vm.stats.sys.v_intr --- vm.stats.sys.v_soft --- vm.stats.sys.v_swtch --- vm.stats.sys.v_syscall --- vm.stats.sys.v_trap --- vm.stats.vm.v_cow_faults --- vm.stats.vm.v_cow_optim --- vm.stats.vm.v_forkpages --- vm.stats.vm.v_forks --- vm.stats.vm.v_intrans --- vm.stats.vm.v_kthreadpages --- vm.stats.vm.v_kthreads --- vm.stats.vm.v_ozfod --- vm.stats.vm.v_pdpages --- vm.stats.vm.v_pdwakeups --- vm.stats.vm.v_reactivated --- vm.stats.vm.v_rforkpages --- vm.stats.vm.v_rforks --- vm.stats.vm.v_swapin --- vm.stats.vm.v_swapout --- vm.stats.vm.v_swappgsin --- vm.stats.vm.v_swappgsout --- vm.stats.vm.v_vforkpages --- vm.stats.vm.v_vforks --- vm.stats.vm.v_vm_faults --- vm.stats.vm.v_vnodein --- vm.stats.vm.v_vnodeout --- vm.stats.vm.v_vnodepgsin --- vm.stats.vm.v_vnodepgsout --- vm.stats.vm.v_zfod --- vm.swap_async_max int The maximum number of in-progress async operations that may be performed. --- vm.swap_enabled bool Determines whether or not processes may swap. --- vm.swap_idle_enabled See .Xr tuning 7 for a detailed explanation of this .Nm . --- vm.swap_info --- vm.vmtotal string Displays virtual memory statistics which are collected at five second intervals. --- vm.zone string Shows memory used by the kernel zone allocator, by zone. This information can also be found by using the .Xr vmstat 8 command. ---