Index: sbin/reboot/reboot.8 =================================================================== --- sbin/reboot/reboot.8 +++ sbin/reboot/reboot.8 @@ -28,7 +28,7 @@ .\" @(#)reboot.8 8.1 (Berkeley) 6/9/93 .\" $FreeBSD$ .\" -.Dd October 11, 2010 +.Dd May 22, 2015 .Dt REBOOT 8 .Os .Sh NAME @@ -42,7 +42,7 @@ .Op Fl lnpq .Op Fl k Ar kernel .Nm -.Op Fl dlnpq +.Op Fl dlnpqr .Op Fl k Ar kernel .Nm fasthalt .Op Fl lnpq @@ -111,6 +111,13 @@ .Fl n option is not specified). This option should probably not be used. +.It Fl r +The system will kill all processes, unmount all filesystems, mount the new +root filesystem, and start +.Xr init 8 . +It can be used after updating the vfs.root.mountfrom using +.Xr kenv 8 , +to change the root filesystem while preserving kernel state. .El .Pp The Index: sbin/reboot/reboot.c =================================================================== --- sbin/reboot/reboot.c +++ sbin/reboot/reboot.c @@ -77,7 +77,7 @@ } else howto = 0; lflag = nflag = qflag = 0; - while ((ch = getopt(argc, argv, "dk:lnpq")) != -1) + while ((ch = getopt(argc, argv, "dk:lnpqr")) != -1) switch(ch) { case 'd': howto |= RB_DUMP; @@ -98,6 +98,9 @@ case 'q': qflag = 1; break; + case 'r': + howto |= RB_REROOT; + break; case '?': default: usage(); @@ -107,6 +110,8 @@ if ((howto & (RB_DUMP | RB_HALT)) == (RB_DUMP | RB_HALT)) errx(1, "cannot dump (-d) when halting; must reboot instead"); + if ((howto & RB_REROOT) != 0 && (howto & RB_REROOT) != RB_REROOT) + errx(1, "-r flag is mutually exclusive with -d, -n, and -p"); if (geteuid()) { errno = EPERM; err(1, NULL); Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -816,13 +816,13 @@ * Note special case - do not make it runnable yet. Other work * in progress will change this more. */ -static void +void create_init(const void *udata __unused) { struct ucred *newcred, *oldcred; int error; - error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc, + error = fork1(&thread0, RFFDG | RFPID1 | RFPROC | RFSTOPPED, 0, &initproc, NULL, 0); if (error) panic("cannot fork init: %d\n", error); @@ -854,7 +854,7 @@ /* * Make it runnable now. */ -static void +void kick_init(const void *udata __unused) { struct thread *td; Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c +++ sys/kern/kern_fork.c @@ -234,7 +234,9 @@ * low-numbered pids. */ trypid = lastpid + 1; - if (flags & RFHIGHPID) { + if (flags & RFPID1) { + trypid = 1; + } else if (flags & RFHIGHPID) { if (trypid < 10) trypid = 10; } else { Index: sys/kern/kern_shutdown.c =================================================================== --- sys/kern/kern_shutdown.c +++ sys/kern/kern_shutdown.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,7 @@ #include #include #include +#include #include #include @@ -154,6 +156,7 @@ static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); +static void kern_reroot(void); /* register various local shutdown events */ static void @@ -173,6 +176,47 @@ SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); /* + * XXX: The whole point of this is to avoid 'vm_object_unwire: missing page' + * panic for random processes surviving the procedure. It's also racy. + */ +static void +exterminate(void) +{ + struct proc *p; + + KASSERT(rebooting == 0, ("don't intend to kill system processes")); + + sx_xlock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_pid == 11) { + /* + * XXX: Killing pid 11 (idle) wedges disk io. + */ + continue; + } + PROC_LOCK(p); + kern_psignal(p, SIGKILL); + PROC_UNLOCK(p); + } + sx_xunlock(&allproc_lock); + + pause("exterminate", hz); + +again: + sx_xlock(&proctree_lock); + sx_xlock(&allproc_lock); + while ((p = LIST_FIRST(&zombproc)) != NULL) { + PROC_LOCK(p); + sx_xunlock(&allproc_lock); + PROC_SLOCK(p); + proc_reap(&thread0, p, NULL, 0); + goto again; + } + sx_xunlock(&allproc_lock); + sx_xunlock(&proctree_lock); +} + +/* * The system call that results in a reboot. */ /* ARGSUSED */ @@ -188,8 +232,15 @@ if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { - mtx_lock(&Giant); - kern_reboot(uap->opt); + if (uap->opt & RB_REROOT) { + kern_reroot(); + mtx_lock(&Giant); + } else { + mtx_lock(&Giant); + kern_reboot(uap->opt); + } + swapoff_all(); + DELAY(100000); /* wait for console output to finish */ mtx_unlock(&Giant); } return (error); @@ -453,6 +504,80 @@ /* NOTREACHED */ } +static void +kern_reroot(void) +{ + struct vnode *oldrootvnode; + struct proc *oldinitproc; + struct proc *p; + + exterminate(); + + oldrootvnode = rootvnode; + oldinitproc = initproc; + initproc = NULL; + + /* + * Undo something weird that happens in create_init(). + * XXX: Why? + */ + PROC_LOCK(oldinitproc); + LIST_REMOVE(&proc0, p_reapsibling); + PROC_UNLOCK(oldinitproc); + + /* + * Kill init, so that we can reuse PID 1. + */ + rebooting = 1; + PROC_LOCK(oldinitproc); + kern_psignal(oldinitproc, SIGKILL); + while (oldinitproc->p_state != PRS_ZOMBIE) { + PROC_UNLOCK(oldinitproc); + pause("killinit", hz); + PROC_LOCK(oldinitproc); + } + PROC_UNLOCK(oldinitproc); + rebooting = 0; + + /* + * Unmount everything, including /dev and rootfs. + */ + vfs_unmountall(); + + /* + * Start the new init; this includes mounting new + * rootfs and /dev. It's deferred to a kthread; + * wait until it completes. + */ + create_init(NULL); + kick_init(NULL); + /* + * XXX: Without exterminate(), it dies at this point + * with 'vm_object_unwire: missing page'. + */ + while (rootvnode == NULL) + pause("rootvnode", 1); + + /* + * Update all references to the old rootvnode. + */ + mountcheckdirs(oldrootvnode, rootvnode); + + /* + * Reparent old zombies to the new init, + * so that they can be properly reaped. + */ + sx_xlock(&proctree_lock); + sx_xlock(&allproc_lock); + LIST_FOREACH(p, &zombproc, p_list) { + PROC_LOCK(p); + proc_reparent(p, initproc); + PROC_UNLOCK(p); + } + sx_xunlock(&allproc_lock); + sx_xunlock(&proctree_lock); +} + /* * If the shutdown was a clean halt, behave accordingly. */ Index: sys/kern/kern_sig.c =================================================================== --- sys/kern/kern_sig.c +++ sys/kern/kern_sig.c @@ -2735,9 +2735,12 @@ case (intptr_t)SIG_DFL: /* - * Don't take default actions on system processes. + * Don't take default actions on system processes, + * but make sure to allow killing init(8) + * when rebooting. */ - if (p->p_pid <= 1) { + if (p->p_pid < 1 || + (p->p_pid == 1 && rebooting == 0)) { #ifdef DIAGNOSTIC /* * Are you sure you want to ignore SIGSEGV Index: sys/sys/kernel.h =================================================================== --- sys/sys/kernel.h +++ sys/sys/kernel.h @@ -64,6 +64,7 @@ extern int profhz; /* profiling clock's frequency */ extern int profprocs; /* number of process's profiling */ extern volatile int ticks; +struct thread; #endif /* _KERNEL */ @@ -374,4 +375,7 @@ int config_intrhook_establish(struct intr_config_hook *hook); void config_intrhook_disestablish(struct intr_config_hook *hook); +void create_init(const void *udata __unused); +void kick_init(const void *udata __unused); + #endif /* !_SYS_KERNEL_H_*/ Index: sys/sys/reboot.h =================================================================== --- sys/sys/reboot.h +++ sys/sys/reboot.h @@ -59,6 +59,7 @@ #define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */ #define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */ #define RB_PAUSE 0x100000 /* pause after each output line during probe */ +#define RB_REROOT 0x200000 /* unmount the rootfs and mount it again */ #define RB_MULTIPLE 0x20000000 /* use multiple consoles */ #define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */ Index: sys/sys/unistd.h =================================================================== --- sys/sys/unistd.h +++ sys/sys/unistd.h @@ -186,11 +186,12 @@ #define RFTSIGNUM(flags) (((flags) >> RFTSIGSHIFT) & RFTSIGMASK) #define RFTSIGFLAGS(signum) ((signum) << RFTSIGSHIFT) #define RFPROCDESC (1<<28) /* return a process descriptor */ +#define RFPID1 (1<<29) /* try to allocate pid 1 */ #define RFPPWAIT (1<<31) /* parent sleeps until child exits (vfork) */ #define RFFLAGS (RFFDG | RFPROC | RFMEM | RFNOWAIT | RFCFDG | \ RFTHREAD | RFSIGSHARE | RFLINUXTHPN | RFSTOPPED | RFHIGHPID | RFTSIGZMB | \ - RFPROCDESC | RFPPWAIT) -#define RFKERNELONLY (RFSTOPPED | RFHIGHPID | RFPPWAIT | RFPROCDESC) + RFPROCDESC | RFPID1 | RFPPWAIT) +#define RFKERNELONLY (RFSTOPPED | RFHIGHPID | RFPPWAIT | RFPROCDESC | RFPID1) #endif /* __BSD_VISIBLE */