Index: lib/libc/sys/reboot.2 =================================================================== --- lib/libc/sys/reboot.2 +++ lib/libc/sys/reboot.2 @@ -82,7 +82,7 @@ .Xr savecore 8 for more information. .It Dv RB_HALT -the processor is simply halted; no reboot takes place. +The processor is simply halted; no reboot takes place. This option should be used with caution. .It Dv RB_POWEROFF After halting, the shutdown code will do what it can to turn @@ -113,6 +113,9 @@ before the processor is halted or rebooted. This option may be useful if file system changes have been made manually or if the processor is on fire. +.It Dv RB_REROOT +Instead of rebooting, unmount all filesystems except the one containing +currently-running executable, and mount the new root filesystem. .It Dv RB_RDONLY Initially mount the root file system read-only. This is currently the default, and this option has been deprecated. Index: sbin/init/Makefile =================================================================== --- sbin/init/Makefile +++ sbin/init/Makefile @@ -2,12 +2,19 @@ # $FreeBSD$ PROG= init +SRCS= init.c +SRCS+= getmntopts.c MAN= init.8 PRECIOUSPROG= INSTALLFLAGS=-b -B.bak CFLAGS+=-DDEBUGSHELL -DSECURE -DLOGIN_CAP -DCOMPAT_SYSV_INIT LIBADD= util crypt +# Needed for getmntopts.c +MOUNT= ${.CURDIR}/../../sbin/mount +CFLAGS+=-I${MOUNT} +.PATH: ${MOUNT} + NO_SHARED?= YES .include Index: sbin/init/init.8 =================================================================== --- sbin/init/init.8 +++ sbin/init/init.8 @@ -284,6 +284,7 @@ as follows: .Bl -column Run-level SIGTERM .It Sy "Run-level Signal Action" +.It Cm 0 Ta Dv SIGUSR1 Ta "Halt" .It Cm 0 Ta Dv SIGUSR2 Ta "Halt and turn the power off" .It Cm 1 Ta Dv SIGTERM Ta "Go to single-user mode" .It Cm 6 Ta Dv SIGINT Ta "Reboot the machine" Index: sbin/init/init.c =================================================================== --- sbin/init/init.c +++ sbin/init/init.c @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -79,6 +80,7 @@ #include #endif +#include "mntopts.h" #include "pathnames.h" /* @@ -122,6 +124,8 @@ static state_func_t catatonia(void); static state_func_t death(void); static state_func_t death_single(void); +static state_func_t reroot(void); +static state_func_t reroot_phase_two(void); static state_func_t run_script(const char *); @@ -193,7 +197,7 @@ { state_t initial_transition = runcom; char kenv_value[PATH_MAX]; - int c; + int c, error; struct sigaction sa; sigset_t mask; @@ -226,6 +230,9 @@ case 'q': /* rescan /etc/ttys */ sig = SIGHUP; break; + case 'r': /* remount root */ + sig = SIGEMT; + break; default: goto invalid; } @@ -247,8 +254,13 @@ /* * Create an initial session. */ - if (setsid() < 0) - warning("initial setsid() failed: %m"); + if (setsid() < 0) { + if (errno == EPERM && getsid(0) == getpid()) { + /* Okay, we've already done setsid() before. */ + } else { + warning("initial setsid() failed: %m"); + } + } /* * Establish an initial user so that programs running @@ -261,7 +273,7 @@ * This code assumes that we always get arguments through flags, * never through bits set in some random machine register. */ - while ((c = getopt(argc, argv, "dsf")) != -1) + while ((c = getopt(argc, argv, "dsfR")) != -1) switch (c) { case 'd': devfs = 1; @@ -272,6 +284,9 @@ case 'f': runcom_mode = FASTBOOT; break; + case 'R': + initial_transition = reroot_phase_two; + break; default: warning("unrecognized flag '-%c'", c); break; @@ -287,13 +302,13 @@ handle(badsys, SIGSYS, 0); handle(disaster, SIGABRT, SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGXCPU, SIGXFSZ, 0); - handle(transition_handler, SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGUSR1, - SIGUSR2, 0); + handle(transition_handler, SIGHUP, SIGINT, SIGEMT, SIGTERM, SIGTSTP, + SIGUSR1, SIGUSR2, 0); handle(alrm_handler, SIGALRM, 0); sigfillset(&mask); delset(&mask, SIGABRT, SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGSYS, - SIGXCPU, SIGXFSZ, SIGHUP, SIGINT, SIGTERM, SIGTSTP, SIGALRM, - SIGUSR1, SIGUSR2, 0); + SIGXCPU, SIGXFSZ, SIGHUP, SIGINT, SIGEMT, SIGTERM, SIGTSTP, + SIGALRM, SIGUSR1, SIGUSR2, 0); sigprocmask(SIG_SETMASK, &mask, (sigset_t *) 0); sigemptyset(&sa.sa_mask); sa.sa_flags = 0; @@ -373,6 +388,15 @@ free(s); } + if (initial_transition != reroot_phase_two) { + /* + * Unmount reroot leftovers. + */ + error = unmount(_PATH_REROOT, MNT_FORCE); + if (error != 0 && errno != EINVAL) + warning("Cannot unmount %s: %m", _PATH_REROOT); + } + /* * Start the state machine. */ @@ -620,6 +644,184 @@ write(STDERR_FILENO, message, strlen(message)); } +static int +read_file(const char *path, void **bufp, size_t *bufsizep) +{ + struct stat sb; + int error, fd; + size_t bufsize; + void *buf; + ssize_t nbytes; + + fd = open(path, O_RDONLY); + if (fd < 0) { + emergency("%s: %s", path, strerror(errno)); + return (-1); + } + + error = fstat(fd, &sb); + if (error != 0) { + emergency("fstat: %s", strerror(errno)); + return (error); + } + + bufsize = sb.st_size; + buf = malloc(bufsize); + if (buf == NULL) { + emergency("malloc: %s", strerror(errno)); + return (error); + } + + nbytes = read(fd, buf, bufsize); + if (nbytes != (ssize_t)bufsize) { + emergency("read: %s", strerror(errno)); + free(buf); + return (error); + } + + error = close(fd); + if (error != 0) { + emergency("close: %s", strerror(errno)); + free(buf); + return (error); + } + + *bufp = buf; + *bufsizep = bufsize; + + return (0); +} + +static int +create_file(const char *path, void *buf, size_t bufsize) +{ + int error, fd; + ssize_t nbytes; + + fd = open(path, O_WRONLY | O_CREAT | O_EXCL, 0700); + if (fd < 0) { + emergency("%s: %s", path, strerror(errno)); + return (-1); + } + + nbytes = write(fd, buf, bufsize); + if (nbytes != (ssize_t)bufsize) { + emergency("write: %s", strerror(errno)); + return (-1); + } + + error = close(fd); + if (error != 0) { + emergency("close: %s", strerror(errno)); + free(buf); + return (-1); + } + + return (0); +} + +static int +mount_tmpfs(const char *fspath) +{ + struct iovec *iov = NULL; + char errmsg[255]; + int error, iovlen = 0; + + memset(errmsg, 0, sizeof(errmsg)); + + build_iovec(&iov, &iovlen, "fstype", + __DECONST(void *, "tmpfs"), (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", + __DECONST(void *, fspath), (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", + errmsg, sizeof(errmsg)); + + error = nmount(iov, iovlen, 0); + if (error != 0) { + if (*errmsg != '\0') { + emergency("cannot mount tmpfs on %s: %s: %s", + fspath, errmsg, strerror(errno)); + } else { + emergency("cannot mount tmpfs on %s: %s", + fspath, strerror(errno)); + } + return (error); + } + + return (0); +} + +static state_func_t +reroot(void) +{ + int error; + void *buf; + size_t bufsize; + + /* + * Copy the init binary into tmpfs, so that we can unmount + * the old rootfs without committing suicide. + */ + error = read_file(_PATH_INIT, &buf, &bufsize); + if (error != 0) + goto out; + error = mount_tmpfs(_PATH_REROOT); + if (error != 0) + goto out; + error = create_file(_PATH_REROOT_INIT, buf, bufsize); + if (error != 0) + goto out; + + /* + * Execute the temporary init. + */ + execl(_PATH_REROOT_INIT, _PATH_REROOT_INIT, "-R", NULL); + emergency("cannot exec %s: %s", _PATH_REROOT_INIT, strerror(errno)); + +out: + emergency("reroot failed; going to single user mode"); + + /* + * Make sure we don't loop when calling single_user on error. + */ + howto = RB_AUTOBOOT; + Reboot = FALSE; + return (state_func_t) single_user; +} + +static state_func_t +reroot_phase_two(void) +{ + int error; + + /* + * Ask the kernel to mount the new rootfs. + */ + error = reboot(RB_REROOT); + if (error != 0) { + emergency("RB_REBOOT failed: %s", strerror(errno)); + goto out; + } + + /* + * Execute init(8) from the new rootfs. + * + * Note that at this point, all this warning() stuff is useless + * anyway; we don't have stderr nor stdout. + */ + execl(_PATH_INIT, _PATH_INIT, NULL); + emergency("cannot exec %s: %s", _PATH_INIT, strerror(errno)); + +out: + /* + * Make sure we don't loop when calling single_user on error. + */ + howto = RB_AUTOBOOT; + Reboot = FALSE; + emergency("reroot failed; going to single user mode"); + return (state_func_t) single_user; +} + /* * Bring the system up single user. */ @@ -641,6 +843,8 @@ #ifdef DEBUGSHELL char altshell[128]; #endif + if (howto == RB_REROOT) + return (reroot()); if (Reboot) { /* Instead of going single user, let's reboot the machine */ @@ -1308,6 +1512,8 @@ howto = RB_POWEROFF; case SIGUSR1: howto |= RB_HALT; + case SIGEMT: + howto = RB_REROOT; case SIGINT: Reboot = TRUE; case SIGTERM: Index: sbin/init/pathnames.h =================================================================== --- sbin/init/pathnames.h +++ sbin/init/pathnames.h @@ -35,7 +35,10 @@ #include -#define _PATH_INITLOG "/var/log/init.log" -#define _PATH_SLOGGER "/sbin/session_logger" -#define _PATH_RUNCOM "/etc/rc" -#define _PATH_RUNDOWN "/etc/rc.shutdown" +#define _PATH_INITLOG "/var/log/init.log" +#define _PATH_SLOGGER "/sbin/session_logger" +#define _PATH_RUNCOM "/etc/rc" +#define _PATH_RUNDOWN "/etc/rc.shutdown" +#define _PATH_INIT "/sbin/init" +#define _PATH_REROOT "/tmp" +#define _PATH_REROOT_INIT _PATH_REROOT "/init" Index: sbin/reboot/reboot.8 =================================================================== --- sbin/reboot/reboot.8 +++ sbin/reboot/reboot.8 @@ -28,7 +28,7 @@ .\" @(#)reboot.8 8.1 (Berkeley) 6/9/93 .\" $FreeBSD$ .\" -.Dd October 11, 2010 +.Dd May 22, 2015 .Dt REBOOT 8 .Os .Sh NAME @@ -42,7 +42,7 @@ .Op Fl lnpq .Op Fl k Ar kernel .Nm -.Op Fl dlnpq +.Op Fl dlnpqr .Op Fl k Ar kernel .Nm fasthalt .Op Fl lnpq @@ -111,6 +111,12 @@ .Fl n option is not specified). This option should probably not be used. +.It Fl r +The system kills all processes, unmounts all filesystems, mounts the new +root filesystem, and begins the usual startup sequence. +It can be used after updating vfs.root.mountfrom using +.Xr kenv 8 , +to change the root filesystem while preserving kernel state. .El .Pp The Index: sbin/reboot/reboot.c =================================================================== --- sbin/reboot/reboot.c +++ sbin/reboot/reboot.c @@ -77,7 +77,7 @@ } else howto = 0; lflag = nflag = qflag = 0; - while ((ch = getopt(argc, argv, "dk:lnpq")) != -1) + while ((ch = getopt(argc, argv, "dk:lnpqr")) != -1) switch(ch) { case 'd': howto |= RB_DUMP; @@ -98,6 +98,9 @@ case 'q': qflag = 1; break; + case 'r': + howto |= RB_REROOT; + break; case '?': default: usage(); @@ -107,10 +110,14 @@ if ((howto & (RB_DUMP | RB_HALT)) == (RB_DUMP | RB_HALT)) errx(1, "cannot dump (-d) when halting; must reboot instead"); + if ((howto & RB_REROOT) != 0 && (howto & RB_REROOT) != RB_REROOT) + errx(1, "-r flag is mutually exclusive with -d, -n, and -p"); +#if 0 if (geteuid()) { errno = EPERM; err(1, NULL); } +#endif if (qflag) { reboot(howto); @@ -137,6 +144,9 @@ if (dohalt) { openlog("halt", 0, LOG_AUTH | LOG_CONS); syslog(LOG_CRIT, "halted by %s", user); + } else if (howto & RB_REROOT) { + openlog("reroot", 0, LOG_AUTH | LOG_CONS); + syslog(LOG_CRIT, "rerooted by %s", user); } else { openlog("reboot", 0, LOG_AUTH | LOG_CONS); syslog(LOG_CRIT, "rebooted by %s", user); @@ -170,6 +180,16 @@ */ (void)signal(SIGPIPE, SIG_IGN); + /* + * Nobody but init(8) can perform rerooting. + */ + if (howto & RB_REROOT) { + if (kill(1, SIGEMT) == -1) + err(1, "SIGEMT init"); + + return (0); + } + /* Just stop init -- if we fail, we'll restart it. */ if (kill(1, SIGTSTP) == -1) err(1, "SIGTSTP init"); Index: sys/fs/devfs/devfs_vfsops.c =================================================================== --- sys/fs/devfs/devfs_vfsops.c +++ sys/fs/devfs/devfs_vfsops.c @@ -182,6 +182,8 @@ fmp = VFSTODEVFS(mp); KASSERT(fmp->dm_mount != NULL, ("devfs_unmount unmounted devfs_mount")); + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; /* There is 1 extra root vnode reference from devfs_mount(). */ error = vflush(mp, 1, flags, curthread); if (error) Index: sys/geom/geom_dev.c =================================================================== --- sys/geom/geom_dev.c +++ sys/geom/geom_dev.c @@ -358,6 +358,13 @@ #else e = 0; #endif + + /* + * This happens on attempt to open a device node with O_EXEC. + */ + if (r + w + e == 0) + return (EINVAL); + if (w) { /* * When running in very secure mode, do not allow @@ -401,6 +408,10 @@ #else e = 0; #endif + + if (r + w + e == 0) + return (EINVAL); + sc = cp->private; mtx_lock(&sc->sc_mtx); sc->sc_open += r + w + e; Index: sys/kern/kern_shutdown.c =================================================================== --- sys/kern/kern_shutdown.c +++ sys/kern/kern_shutdown.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -154,6 +155,7 @@ static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); +static int kern_reroot(void); /* register various local shutdown events */ static void @@ -172,9 +174,6 @@ SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); -/* - * The system call that results in a reboot. - */ /* ARGSUSED */ int sys_reboot(struct thread *td, struct reboot_args *uap) @@ -188,9 +187,17 @@ if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { - mtx_lock(&Giant); - kern_reboot(uap->opt); - mtx_unlock(&Giant); + if (uap->opt & RB_REROOT) { + mtx_lock(&Giant); + error = kern_reroot(); + mtx_unlock(&Giant); + } else { + mtx_lock(&Giant); + kern_reboot(uap->opt); + swapoff_all(); + DELAY(100000); /* wait for console output to finish */ + mtx_unlock(&Giant); + } } return (error); } @@ -454,6 +461,52 @@ } /* + * The system call that results in changing the rootfs. + */ +static int +kern_reroot(void) +{ + struct vnode *oldrootvnode; + struct mount *mp; + + if (curproc->p_pid != 1) + return (EPERM); + + /* + * Remove the filesystem containing currently-running executable + * from the mount list, to prevent it from being unmounted + * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). + */ + mp = curproc->p_textvp->v_mount; + + mtx_lock(&mountlist_mtx); + TAILQ_REMOVE(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + + oldrootvnode = rootvnode; + + /* + * Actually change the rootfs. + */ + vfs_unmountall(); + vfs_mountroot(); + + /* + * Update all references to the old rootvnode. + */ + mountcheckdirs(oldrootvnode, rootvnode); + + /* + * Add the temporary filesystem back. + */ + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + + return (0); +} + +/* * If the shutdown was a clean halt, behave accordingly. */ static void Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -2709,10 +2709,12 @@ * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { +#if 0 VNASSERT(vp->v_usecount == 0 || vp->v_op != &devfs_specops || (vp->v_type != VCHR && vp->v_type != VBLK), vp, ("device VNODE %p is FORCECLOSED", vp)); +#endif vgonel(vp); } else { busy++; @@ -3554,40 +3556,50 @@ void vfs_unmountall(void) { - struct mount *mp; - struct thread *td; + struct mount *mp, *devmp, *tmp; int error; CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__); - td = curthread; + + devmp = NULL; /* * Since this only runs when rebooting, it is not interlocked. */ - while(!TAILQ_EMPTY(&mountlist)) { - mp = TAILQ_LAST(&mountlist, mntlist); + TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) { vfs_ref(mp); - error = dounmount(mp, MNT_FORCE, td); + + /* + * Forcibly unmounting /dev before / would prevent proper + * unmount of the latter. + */ + if (strcmp(mp->mnt_stat.f_mntonname, "/dev") == 0) { + devmp = mp; + continue; + } + + printf("%s: unmounting %s\n", __func__, mp->mnt_stat.f_mntonname); + error = dounmount(mp, MNT_FORCE, curthread); if (error != 0) { - TAILQ_REMOVE(&mountlist, mp, mnt_list); - /* - * XXX: Due to the way in which we mount the root - * file system off of devfs, devfs will generate a - * "busy" warning when we try to unmount it before - * the root. Don't print a warning as a result in - * order to avoid false positive errors that may - * cause needless upset. - */ - if (strcmp(mp->mnt_vfc->vfc_name, "devfs") != 0) { - printf("unmount of %s failed (", - mp->mnt_stat.f_mntonname); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - } else { - /* The unmount has removed mp from the mountlist */ + printf("unmount of %s failed (", + mp->mnt_stat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + } + + if (devmp != NULL) { + printf("%s: unmounting %s\n", __func__, devmp->mnt_stat.f_mntonname); + error = dounmount(devmp, MNT_FORCE, curthread); + if (error != 0) { + printf("unmount of %s failed (", + devmp->mnt_stat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); } } } Index: sys/sys/reboot.h =================================================================== --- sys/sys/reboot.h +++ sys/sys/reboot.h @@ -59,6 +59,7 @@ #define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */ #define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */ #define RB_PAUSE 0x100000 /* pause after each output line during probe */ +#define RB_REROOT 0x200000 /* unmount the rootfs and mount it again */ #define RB_MULTIPLE 0x20000000 /* use multiple consoles */ #define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */